1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)lfs_segment.c 8.4 (Berkeley) 12/30/93 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/namei.h> 13 #include <sys/kernel.h> 14 #include <sys/resourcevar.h> 15 #include <sys/file.h> 16 #include <sys/stat.h> 17 #include <sys/buf.h> 18 #include <sys/proc.h> 19 #include <sys/conf.h> 20 #include <sys/vnode.h> 21 #include <sys/malloc.h> 22 #include <sys/mount.h> 23 24 #include <miscfs/specfs/specdev.h> 25 #include <miscfs/fifofs/fifo.h> 26 27 #include <ufs/ufs/quota.h> 28 #include <ufs/ufs/inode.h> 29 #include <ufs/ufs/dir.h> 30 #include <ufs/ufs/ufsmount.h> 31 #include <ufs/ufs/ufs_extern.h> 32 33 #include <ufs/lfs/lfs.h> 34 #include <ufs/lfs/lfs_extern.h> 35 36 extern int count_lock_queue __P((void)); 37 38 #define MAX_ACTIVE 10 39 /* 40 * Determine if it's OK to start a partial in this segment, or if we need 41 * to go on to a new segment. 42 */ 43 #define LFS_PARTIAL_FITS(fs) \ 44 ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ 45 1 << (fs)->lfs_fsbtodb) 46 47 void lfs_callback __P((struct buf *)); 48 void lfs_gather __P((struct lfs *, struct segment *, 49 struct vnode *, int (*) __P((struct lfs *, struct buf *)))); 50 int lfs_gatherblock __P((struct segment *, struct buf *, int *)); 51 void lfs_iset __P((struct inode *, daddr_t, time_t)); 52 int lfs_match_data __P((struct lfs *, struct buf *)); 53 int lfs_match_dindir __P((struct lfs *, struct buf *)); 54 int lfs_match_indir __P((struct lfs *, struct buf *)); 55 int lfs_match_tindir __P((struct lfs *, struct buf *)); 56 void lfs_newseg __P((struct lfs *)); 57 void lfs_shellsort __P((struct buf **, daddr_t *, register int)); 58 void lfs_supercallback __P((struct buf *)); 59 void lfs_updatemeta __P((struct segment *)); 60 int lfs_vref __P((struct vnode *)); 61 void lfs_vunref __P((struct vnode *)); 62 void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *)); 63 int lfs_writeinode __P((struct lfs *, struct segment *, struct inode *)); 64 int lfs_writeseg __P((struct lfs *, struct segment *)); 65 void lfs_writesuper __P((struct lfs *)); 66 void lfs_writevnodes __P((struct lfs *fs, struct mount *mp, 67 struct segment *sp, int dirops)); 68 69 int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 70 71 /* Statistics Counters */ 72 #define DOSTATS 73 struct lfs_stats lfs_stats; 74 75 /* op values to lfs_writevnodes */ 76 #define VN_REG 0 77 #define VN_DIROP 1 78 #define VN_EMPTY 2 79 80 /* 81 * Ifile and meta data blocks are not marked busy, so segment writes MUST be 82 * single threaded. Currently, there are two paths into lfs_segwrite, sync() 83 * and getnewbuf(). They both mark the file system busy. Lfs_vflush() 84 * explicitly marks the file system busy. So lfs_segwrite is safe. I think. 85 */ 86 87 int 88 lfs_vflush(vp) 89 struct vnode *vp; 90 { 91 struct inode *ip; 92 struct lfs *fs; 93 struct segment *sp; 94 int error, s; 95 96 fs = VFSTOUFS(vp->v_mount)->um_lfs; 97 if (fs->lfs_nactive > MAX_ACTIVE) 98 return(lfs_segwrite(vp->v_mount, SEGM_SYNC|SEGM_CKP)); 99 lfs_seglock(fs, SEGM_SYNC); 100 sp = fs->lfs_sp; 101 102 103 ip = VTOI(vp); 104 if (vp->v_dirtyblkhd.lh_first == NULL) 105 lfs_writevnodes(fs, vp->v_mount, sp, VN_EMPTY); 106 107 do { 108 do { 109 if (vp->v_dirtyblkhd.lh_first != NULL) 110 lfs_writefile(fs, sp, vp); 111 } while (lfs_writeinode(fs, sp, ip)); 112 113 } while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM); 114 115 #ifdef DOSTATS 116 ++lfs_stats.nwrites; 117 if (sp->seg_flags & SEGM_SYNC) 118 ++lfs_stats.nsync_writes; 119 if (sp->seg_flags & SEGM_CKP) 120 ++lfs_stats.ncheckpoints; 121 #endif 122 lfs_segunlock(fs); 123 return (0); 124 } 125 126 void 127 lfs_writevnodes(fs, mp, sp, op) 128 struct lfs *fs; 129 struct mount *mp; 130 struct segment *sp; 131 int op; 132 { 133 struct inode *ip; 134 struct vnode *vp; 135 int error, s, active; 136 137 loop: 138 for (vp = mp->mnt_vnodelist.lh_first; 139 vp != NULL; 140 vp = vp->v_mntvnodes.le_next) { 141 /* 142 * If the vnode that we are about to sync is no longer 143 * associated with this mount point, start over. 144 */ 145 if (vp->v_mount != mp) 146 goto loop; 147 148 /* XXX ignore dirops for now 149 if (op == VN_DIROP && !(vp->v_flag & VDIROP) || 150 op != VN_DIROP && (vp->v_flag & VDIROP)) 151 continue; 152 */ 153 154 if (op == VN_EMPTY && vp->v_dirtyblkhd.lh_first) 155 continue; 156 157 if (vp->v_type == VNON) 158 continue; 159 160 if (lfs_vref(vp)) 161 continue; 162 163 /* 164 * Write the inode/file if dirty and it's not the 165 * the IFILE. 166 */ 167 ip = VTOI(vp); 168 if ((ip->i_flag & 169 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE) || 170 vp->v_dirtyblkhd.lh_first != NULL) && 171 ip->i_number != LFS_IFILE_INUM) { 172 if (vp->v_dirtyblkhd.lh_first != NULL) 173 lfs_writefile(fs, sp, vp); 174 (void) lfs_writeinode(fs, sp, ip); 175 } 176 vp->v_flag &= ~VDIROP; 177 lfs_vunref(vp); 178 } 179 } 180 181 int 182 lfs_segwrite(mp, flags) 183 struct mount *mp; 184 int flags; /* Do a checkpoint. */ 185 { 186 struct buf *bp; 187 struct inode *ip; 188 struct lfs *fs; 189 struct segment *sp; 190 struct vnode *vp; 191 SEGUSE *segusep; 192 daddr_t ibno; 193 CLEANERINFO *cip; 194 int clean, error, i, s; 195 int do_ckp; 196 197 fs = VFSTOUFS(mp)->um_lfs; 198 199 /* 200 * If we have fewer than 2 clean segments, wait until cleaner 201 * writes. 202 */ 203 do { 204 LFS_CLEANERINFO(cip, fs, bp); 205 clean = cip->clean; 206 brelse(bp); 207 if (clean <= 2) { 208 printf ("segs clean: %d\n", clean); 209 wakeup(&lfs_allclean_wakeup); 210 if (error = tsleep(&fs->lfs_avail, PRIBIO + 1, 211 "lfs writer", 0)) 212 return (error); 213 } 214 } while (clean <= 2 ); 215 216 /* 217 * Allocate a segment structure and enough space to hold pointers to 218 * the maximum possible number of buffers which can be described in a 219 * single summary block. 220 */ 221 do_ckp = flags & SEGM_CKP || fs->lfs_nactive > MAX_ACTIVE; 222 lfs_seglock(fs, flags | (do_ckp ? SEGM_CKP : 0)); 223 sp = fs->lfs_sp; 224 225 lfs_writevnodes(fs, mp, sp, VN_REG); 226 227 /* XXX ignore ordering of dirops for now */ 228 /* XXX 229 fs->lfs_writer = 1; 230 if (fs->lfs_dirops && (error = 231 tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) { 232 free(sp->bpp, M_SEGMENT); 233 free(sp, M_SEGMENT); 234 fs->lfs_writer = 0; 235 return (error); 236 } 237 238 lfs_writevnodes(fs, mp, sp, VN_DIROP); 239 */ 240 241 /* 242 * If we are doing a checkpoint, mark everything since the 243 * last checkpoint as no longer ACTIVE. 244 */ 245 if (do_ckp) 246 for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz; 247 --ibno >= fs->lfs_cleansz; ) { 248 if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize, 249 NOCRED, &bp)) 250 251 panic("lfs: ifile read"); 252 segusep = (SEGUSE *)bp->b_data; 253 for (i = fs->lfs_sepb; i--; segusep++) 254 segusep->su_flags &= ~SEGUSE_ACTIVE; 255 256 error = VOP_BWRITE(bp); 257 } 258 259 if (do_ckp || fs->lfs_doifile) { 260 redo: 261 vp = fs->lfs_ivnode; 262 while (vget(vp, 1)); 263 ip = VTOI(vp); 264 if (vp->v_dirtyblkhd.lh_first != NULL) 265 lfs_writefile(fs, sp, vp); 266 (void)lfs_writeinode(fs, sp, ip); 267 vput(vp); 268 if (lfs_writeseg(fs, sp) && do_ckp) 269 goto redo; 270 } else 271 (void) lfs_writeseg(fs, sp); 272 273 /* 274 * If the I/O count is non-zero, sleep until it reaches zero. At the 275 * moment, the user's process hangs around so we can sleep. 276 */ 277 /* XXX ignore dirops for now 278 fs->lfs_writer = 0; 279 fs->lfs_doifile = 0; 280 wakeup(&fs->lfs_dirops); 281 */ 282 283 #ifdef DOSTATS 284 ++lfs_stats.nwrites; 285 if (sp->seg_flags & SEGM_SYNC) 286 ++lfs_stats.nsync_writes; 287 if (sp->seg_flags & SEGM_CKP) 288 ++lfs_stats.ncheckpoints; 289 #endif 290 lfs_segunlock(fs); 291 return (0); 292 } 293 294 /* 295 * Write the dirty blocks associated with a vnode. 296 */ 297 void 298 lfs_writefile(fs, sp, vp) 299 struct lfs *fs; 300 struct segment *sp; 301 struct vnode *vp; 302 { 303 struct buf *bp; 304 struct finfo *fip; 305 IFILE *ifp; 306 307 if (sp->seg_bytes_left < fs->lfs_bsize || 308 sp->sum_bytes_left < sizeof(struct finfo)) 309 (void) lfs_writeseg(fs, sp); 310 311 sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t); 312 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 313 314 fip = sp->fip; 315 fip->fi_nblocks = 0; 316 fip->fi_ino = VTOI(vp)->i_number; 317 LFS_IENTRY(ifp, fs, fip->fi_ino, bp); 318 fip->fi_version = ifp->if_version; 319 brelse(bp); 320 321 /* 322 * It may not be necessary to write the meta-data blocks at this point, 323 * as the roll-forward recovery code should be able to reconstruct the 324 * list. 325 */ 326 lfs_gather(fs, sp, vp, lfs_match_data); 327 lfs_gather(fs, sp, vp, lfs_match_indir); 328 lfs_gather(fs, sp, vp, lfs_match_dindir); 329 #ifdef TRIPLE 330 lfs_gather(fs, sp, vp, lfs_match_tindir); 331 #endif 332 333 fip = sp->fip; 334 if (fip->fi_nblocks != 0) { 335 sp->fip = 336 (struct finfo *)((caddr_t)fip + sizeof(struct finfo) + 337 sizeof(daddr_t) * (fip->fi_nblocks - 1)); 338 sp->start_lbp = &sp->fip->fi_blocks[0]; 339 } else { 340 sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t); 341 --((SEGSUM *)(sp->segsum))->ss_nfinfo; 342 } 343 } 344 345 int 346 lfs_writeinode(fs, sp, ip) 347 struct lfs *fs; 348 struct segment *sp; 349 struct inode *ip; 350 { 351 struct buf *bp, *ibp; 352 IFILE *ifp; 353 SEGUSE *sup; 354 daddr_t daddr; 355 ino_t ino; 356 int error, i, ndx; 357 int redo_ifile = 0; 358 359 if (!(ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE))) 360 return(0); 361 362 /* Allocate a new inode block if necessary. */ 363 if (sp->ibp == NULL) { 364 /* Allocate a new segment if necessary. */ 365 if (sp->seg_bytes_left < fs->lfs_bsize || 366 sp->sum_bytes_left < sizeof(daddr_t)) 367 (void) lfs_writeseg(fs, sp); 368 369 /* Get next inode block. */ 370 daddr = fs->lfs_offset; 371 fs->lfs_offset += fsbtodb(fs, 1); 372 sp->ibp = *sp->cbpp++ = 373 lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, daddr, 374 fs->lfs_bsize); 375 /* Zero out inode numbers */ 376 for (i = 0; i < INOPB(fs); ++i) 377 ((struct dinode *)sp->ibp->b_data)[i].di_inumber = 0; 378 ++sp->start_bpp; 379 fs->lfs_avail -= fsbtodb(fs, 1); 380 /* Set remaining space counters. */ 381 sp->seg_bytes_left -= fs->lfs_bsize; 382 sp->sum_bytes_left -= sizeof(daddr_t); 383 ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) - 384 sp->ninodes / INOPB(fs) - 1; 385 ((daddr_t *)(sp->segsum))[ndx] = daddr; 386 } 387 388 /* Update the inode times and copy the inode onto the inode page. */ 389 if (ip->i_flag & IN_MODIFIED) 390 --fs->lfs_uinodes; 391 ITIMES(ip, &time, &time); 392 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); 393 bp = sp->ibp; 394 ((struct dinode *)bp->b_data)[sp->ninodes % INOPB(fs)] = ip->i_din; 395 /* Increment inode count in segment summary block. */ 396 ++((SEGSUM *)(sp->segsum))->ss_ninos; 397 398 /* If this page is full, set flag to allocate a new page. */ 399 if (++sp->ninodes % INOPB(fs) == 0) 400 sp->ibp = NULL; 401 402 /* 403 * If updating the ifile, update the super-block. Update the disk 404 * address and access times for this inode in the ifile. 405 */ 406 ino = ip->i_number; 407 if (ino == LFS_IFILE_INUM) { 408 daddr = fs->lfs_idaddr; 409 fs->lfs_idaddr = bp->b_blkno; 410 } else { 411 LFS_IENTRY(ifp, fs, ino, ibp); 412 daddr = ifp->if_daddr; 413 ifp->if_daddr = bp->b_blkno; 414 error = VOP_BWRITE(ibp); 415 } 416 417 /* 418 * No need to update segment usage if there was no former inode address 419 * or if the last inode address is in the current partial segment. 420 */ 421 if (daddr != LFS_UNUSED_DADDR && 422 !(daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)) { 423 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 424 #ifdef DIAGNOSTIC 425 if (sup->su_nbytes < sizeof(struct dinode)) { 426 /* XXX -- Change to a panic. */ 427 printf("lfs: negative bytes (segment %d)\n", 428 datosn(fs, daddr)); 429 panic("negative bytes"); 430 } 431 #endif 432 sup->su_nbytes -= sizeof(struct dinode); 433 redo_ifile = 434 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); 435 error = VOP_BWRITE(bp); 436 } 437 return (redo_ifile); 438 } 439 440 int 441 lfs_gatherblock(sp, bp, sptr) 442 struct segment *sp; 443 struct buf *bp; 444 int *sptr; 445 { 446 struct lfs *fs; 447 int version; 448 449 /* 450 * If full, finish this segment. We may be doing I/O, so 451 * release and reacquire the splbio(). 452 */ 453 #ifdef DIAGNOSTIC 454 if (sp->vp == NULL) 455 panic ("lfs_gatherblock: Null vp in segment"); 456 #endif 457 fs = sp->fs; 458 if (sp->sum_bytes_left < sizeof(daddr_t) || 459 sp->seg_bytes_left < fs->lfs_bsize) { 460 if (sptr) 461 splx(*sptr); 462 lfs_updatemeta(sp); 463 464 version = sp->fip->fi_version; 465 (void) lfs_writeseg(fs, sp); 466 467 sp->fip->fi_version = version; 468 sp->fip->fi_ino = VTOI(sp->vp)->i_number; 469 /* Add the current file to the segment summary. */ 470 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 471 sp->sum_bytes_left -= 472 sizeof(struct finfo) - sizeof(daddr_t); 473 474 if (sptr) 475 *sptr = splbio(); 476 return(1); 477 } 478 479 /* Insert into the buffer list, update the FINFO block. */ 480 bp->b_flags |= B_GATHERED; 481 *sp->cbpp++ = bp; 482 sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno; 483 484 sp->sum_bytes_left -= sizeof(daddr_t); 485 sp->seg_bytes_left -= fs->lfs_bsize; 486 return(0); 487 } 488 489 void 490 lfs_gather(fs, sp, vp, match) 491 struct lfs *fs; 492 struct segment *sp; 493 struct vnode *vp; 494 int (*match) __P((struct lfs *, struct buf *)); 495 { 496 struct buf *bp; 497 int s; 498 499 sp->vp = vp; 500 s = splbio(); 501 loop: for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next) { 502 if (bp->b_flags & B_BUSY || !match(fs, bp) || 503 bp->b_flags & B_GATHERED) 504 continue; 505 #ifdef DIAGNOSTIC 506 if (!(bp->b_flags & B_DELWRI)) 507 panic("lfs_gather: bp not B_DELWRI"); 508 if (!(bp->b_flags & B_LOCKED)) 509 panic("lfs_gather: bp not B_LOCKED"); 510 #endif 511 if (lfs_gatherblock(sp, bp, &s)) 512 goto loop; 513 } 514 splx(s); 515 lfs_updatemeta(sp); 516 sp->vp = NULL; 517 } 518 519 520 /* 521 * Update the metadata that points to the blocks listed in the FINFO 522 * array. 523 */ 524 void 525 lfs_updatemeta(sp) 526 struct segment *sp; 527 { 528 SEGUSE *sup; 529 struct buf *bp; 530 struct lfs *fs; 531 struct vnode *vp; 532 struct indir a[NIADDR + 2], *ap; 533 struct inode *ip; 534 daddr_t daddr, lbn, off; 535 int db_per_fsb, error, i, nblocks, num; 536 537 vp = sp->vp; 538 nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp; 539 if (vp == NULL || nblocks == 0) 540 return; 541 542 /* Sort the blocks. */ 543 if (!(sp->seg_flags & SEGM_CLEAN)) 544 lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks); 545 546 /* 547 * Assign disk addresses, and update references to the logical 548 * block and the segment usage information. 549 */ 550 fs = sp->fs; 551 db_per_fsb = fsbtodb(fs, 1); 552 for (i = nblocks; i--; ++sp->start_bpp) { 553 lbn = *sp->start_lbp++; 554 (*sp->start_bpp)->b_blkno = off = fs->lfs_offset; 555 fs->lfs_offset += db_per_fsb; 556 557 if (error = ufs_bmaparray(vp, lbn, &daddr, a, &num, NULL)) 558 panic("lfs_updatemeta: ufs_bmaparray %d", error); 559 ip = VTOI(vp); 560 switch (num) { 561 case 0: 562 ip->i_db[lbn] = off; 563 break; 564 case 1: 565 ip->i_ib[a[0].in_off] = off; 566 break; 567 default: 568 ap = &a[num - 1]; 569 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 570 panic("lfs_updatemeta: bread bno %d", 571 ap->in_lbn); 572 /* 573 * Bread may create a new indirect block which needs 574 * to get counted for the inode. 575 */ 576 if (bp->b_blkno == -1 && !(bp->b_flags & B_CACHE)) { 577 printf ("Updatemeta allocating indirect block: shouldn't happen\n"); 578 ip->i_blocks += btodb(fs->lfs_bsize); 579 fs->lfs_bfree -= btodb(fs->lfs_bsize); 580 } 581 ((daddr_t *)bp->b_data)[ap->in_off] = off; 582 VOP_BWRITE(bp); 583 } 584 585 /* Update segment usage information. */ 586 if (daddr != UNASSIGNED && 587 !(daddr >= fs->lfs_lastpseg && daddr <= off)) { 588 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 589 #ifdef DIAGNOSTIC 590 if (sup->su_nbytes < fs->lfs_bsize) { 591 /* XXX -- Change to a panic. */ 592 printf("lfs: negative bytes (segment %d)\n", 593 datosn(fs, daddr)); 594 panic ("Negative Bytes"); 595 } 596 #endif 597 sup->su_nbytes -= fs->lfs_bsize; 598 error = VOP_BWRITE(bp); 599 } 600 } 601 } 602 603 /* 604 * Start a new segment. 605 */ 606 int 607 lfs_initseg(fs) 608 struct lfs *fs; 609 { 610 struct segment *sp; 611 SEGUSE *sup; 612 SEGSUM *ssp; 613 struct buf *bp; 614 daddr_t lbn, *lbnp; 615 int repeat; 616 617 sp = fs->lfs_sp; 618 619 repeat = 0; 620 /* Advance to the next segment. */ 621 if (!LFS_PARTIAL_FITS(fs)) { 622 /* Wake up any cleaning procs waiting on this file system. */ 623 wakeup(&lfs_allclean_wakeup); 624 625 lfs_newseg(fs); 626 repeat = 1; 627 fs->lfs_offset = fs->lfs_curseg; 628 sp->seg_number = datosn(fs, fs->lfs_curseg); 629 sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; 630 631 /* 632 * If the segment contains a superblock, update the offset 633 * and summary address to skip over it. 634 */ 635 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 636 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 637 fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; 638 sp->seg_bytes_left -= LFS_SBPAD; 639 } 640 brelse(bp); 641 } else { 642 sp->seg_number = datosn(fs, fs->lfs_curseg); 643 sp->seg_bytes_left = (fs->lfs_dbpseg - 644 (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; 645 } 646 fs->lfs_lastpseg = fs->lfs_offset; 647 648 sp->fs = fs; 649 sp->ibp = NULL; 650 sp->ninodes = 0; 651 652 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 653 sp->cbpp = sp->bpp; 654 *sp->cbpp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_offset, 655 LFS_SUMMARY_SIZE); 656 sp->segsum = (*sp->cbpp)->b_data; 657 bzero(sp->segsum, LFS_SUMMARY_SIZE); 658 sp->start_bpp = ++sp->cbpp; 659 fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; 660 661 /* Set point to SEGSUM, initialize it. */ 662 ssp = sp->segsum; 663 ssp->ss_next = fs->lfs_nextseg; 664 ssp->ss_nfinfo = ssp->ss_ninos = 0; 665 666 /* Set pointer to first FINFO, initialize it. */ 667 sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM)); 668 sp->fip->fi_nblocks = 0; 669 sp->start_lbp = &sp->fip->fi_blocks[0]; 670 671 sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 672 sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); 673 674 return(repeat); 675 } 676 677 /* 678 * Return the next segment to write. 679 */ 680 void 681 lfs_newseg(fs) 682 struct lfs *fs; 683 { 684 CLEANERINFO *cip; 685 SEGUSE *sup; 686 struct buf *bp; 687 int curseg, error, isdirty, sn; 688 689 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp); 690 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 691 sup->su_nbytes = 0; 692 sup->su_nsums = 0; 693 sup->su_ninos = 0; 694 (void) VOP_BWRITE(bp); 695 696 LFS_CLEANERINFO(cip, fs, bp); 697 --cip->clean; 698 ++cip->dirty; 699 (void) VOP_BWRITE(bp); 700 701 fs->lfs_lastseg = fs->lfs_curseg; 702 fs->lfs_curseg = fs->lfs_nextseg; 703 for (sn = curseg = datosn(fs, fs->lfs_curseg);;) { 704 sn = (sn + 1) % fs->lfs_nseg; 705 if (sn == curseg) 706 panic("lfs_nextseg: no clean segments"); 707 LFS_SEGENTRY(sup, fs, sn, bp); 708 isdirty = sup->su_flags & SEGUSE_DIRTY; 709 brelse(bp); 710 if (!isdirty) 711 break; 712 } 713 714 ++fs->lfs_nactive; 715 fs->lfs_nextseg = sntoda(fs, sn); 716 #ifdef DOSTATS 717 ++lfs_stats.segsused; 718 #endif 719 } 720 721 int 722 lfs_writeseg(fs, sp) 723 struct lfs *fs; 724 struct segment *sp; 725 { 726 extern int locked_queue_count; 727 struct buf **bpp, *bp, *cbp; 728 SEGUSE *sup; 729 SEGSUM *ssp; 730 dev_t i_dev; 731 size_t size; 732 u_long *datap, *dp; 733 int ch_per_blk, do_again, error, i, nblocks, num, s; 734 int (*strategy)__P((struct vop_strategy_args *)); 735 struct vop_strategy_args vop_strategy_a; 736 u_short ninos; 737 char *p; 738 739 /* 740 * If there are no buffers other than the segment summary to write 741 * and it is not a checkpoint, don't do anything. On a checkpoint, 742 * even if there aren't any buffers, you need to write the superblock. 743 */ 744 if ((nblocks = sp->cbpp - sp->bpp) == 1) 745 return (0); 746 747 ssp = (SEGSUM *)sp->segsum; 748 749 /* Update the segment usage information. */ 750 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 751 ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs); 752 sup->su_nbytes += nblocks - 1 - ninos << fs->lfs_bshift; 753 sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode); 754 sup->su_nbytes += LFS_SUMMARY_SIZE; 755 sup->su_lastmod = time.tv_sec; 756 sup->su_ninos += ninos; 757 ++sup->su_nsums; 758 do_again = !(bp->b_flags & B_GATHERED); 759 (void)VOP_BWRITE(bp); 760 /* 761 * Compute checksum across data and then across summary; the first 762 * block (the summary block) is skipped. Set the create time here 763 * so that it's guaranteed to be later than the inode mod times. 764 * 765 * XXX 766 * Fix this to do it inline, instead of malloc/copy. 767 */ 768 datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); 769 for (bpp = sp->bpp, i = nblocks - 1; i--;) { 770 if ((*++bpp)->b_flags & B_INVAL) { 771 if (copyin((*bpp)->b_saveaddr, dp++, sizeof(u_long))) 772 panic("lfs_writeseg: copyin failed"); 773 } else 774 *dp++ = ((u_long *)(*bpp)->b_data)[0]; 775 } 776 ssp->ss_create = time.tv_sec; 777 ssp->ss_datasum = cksum(datap, (nblocks - 1) * sizeof(u_long)); 778 ssp->ss_sumsum = 779 cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum)); 780 free(datap, M_SEGMENT); 781 #ifdef DIAGNOSTIC 782 if (fs->lfs_bfree < fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE) 783 panic("lfs_writeseg: No diskspace for summary"); 784 #endif 785 fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE); 786 787 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 788 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 789 790 /* 791 * When we simply write the blocks we lose a rotation for every block 792 * written. To avoid this problem, we allocate memory in chunks, copy 793 * the buffers into the chunk and write the chunk. MAXPHYS is the 794 * largest size I/O devices can handle. 795 * When the data is copied to the chunk, turn off the the B_LOCKED bit 796 * and brelse the buffer (which will move them to the LRU list). Add 797 * the B_CALL flag to the buffer header so we can count I/O's for the 798 * checkpoints and so we can release the allocated memory. 799 * 800 * XXX 801 * This should be removed if the new virtual memory system allows us to 802 * easily make the buffers contiguous in kernel memory and if that's 803 * fast enough. 804 */ 805 ch_per_blk = MAXPHYS / fs->lfs_bsize; 806 for (bpp = sp->bpp, i = nblocks; i;) { 807 num = ch_per_blk; 808 if (num > i) 809 num = i; 810 i -= num; 811 size = num * fs->lfs_bsize; 812 813 cbp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, 814 (*bpp)->b_blkno, size); 815 cbp->b_dev = i_dev; 816 cbp->b_flags |= B_ASYNC | B_BUSY; 817 818 s = splbio(); 819 ++fs->lfs_iocount; 820 for (p = cbp->b_data; num--;) { 821 bp = *bpp++; 822 /* 823 * Fake buffers from the cleaner are marked as B_INVAL. 824 * We need to copy the data from user space rather than 825 * from the buffer indicated. 826 * XXX == what do I do on an error? 827 */ 828 if (bp->b_flags & B_INVAL) { 829 if (copyin(bp->b_saveaddr, p, bp->b_bcount)) 830 panic("lfs_writeseg: copyin failed"); 831 } else 832 bcopy(bp->b_data, p, bp->b_bcount); 833 p += bp->b_bcount; 834 if (bp->b_flags & B_LOCKED) 835 --locked_queue_count; 836 bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI | 837 B_LOCKED | B_GATHERED); 838 if (bp->b_flags & B_CALL) { 839 /* if B_CALL, it was created with newbuf */ 840 brelvp(bp); 841 if (!(bp->b_flags & B_INVAL)) 842 free(bp->b_data, M_SEGMENT); 843 free(bp, M_SEGMENT); 844 } else { 845 bremfree(bp); 846 bp->b_flags |= B_DONE; 847 reassignbuf(bp, bp->b_vp); 848 brelse(bp); 849 } 850 } 851 ++cbp->b_vp->v_numoutput; 852 splx(s); 853 cbp->b_bcount = p - (char *)cbp->b_data; 854 /* 855 * XXXX This is a gross and disgusting hack. Since these 856 * buffers are physically addressed, they hang off the 857 * device vnode (devvp). As a result, they have no way 858 * of getting to the LFS superblock or lfs structure to 859 * keep track of the number of I/O's pending. So, I am 860 * going to stuff the fs into the saveaddr field of 861 * the buffer (yuk). 862 */ 863 cbp->b_saveaddr = (caddr_t)fs; 864 vop_strategy_a.a_desc = VDESC(vop_strategy); 865 vop_strategy_a.a_bp = cbp; 866 (strategy)(&vop_strategy_a); 867 } 868 /* 869 * XXX 870 * Vinvalbuf can move locked buffers off the locked queue 871 * and we have no way of knowing about this. So, after 872 * doing a big write, we recalculate how many bufers are 873 * really still left on the locked queue. 874 */ 875 locked_queue_count = count_lock_queue(); 876 wakeup(&locked_queue_count); 877 #ifdef DOSTATS 878 ++lfs_stats.psegwrites; 879 lfs_stats.blocktot += nblocks - 1; 880 if (fs->lfs_sp->seg_flags & SEGM_SYNC) 881 ++lfs_stats.psyncwrites; 882 if (fs->lfs_sp->seg_flags & SEGM_CLEAN) { 883 ++lfs_stats.pcleanwrites; 884 lfs_stats.cleanblocks += nblocks - 1; 885 } 886 #endif 887 return (lfs_initseg(fs) || do_again); 888 } 889 890 void 891 lfs_writesuper(fs) 892 struct lfs *fs; 893 { 894 struct buf *bp; 895 dev_t i_dev; 896 int (*strategy) __P((struct vop_strategy_args *)); 897 int s; 898 struct vop_strategy_args vop_strategy_a; 899 900 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 901 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 902 903 /* Checksum the superblock and copy it into a buffer. */ 904 fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum)); 905 bp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_sboffs[0], 906 LFS_SBPAD); 907 *(struct lfs *)bp->b_data = *fs; 908 909 /* XXX Toggle between first two superblocks; for now just write first */ 910 bp->b_dev = i_dev; 911 bp->b_flags |= B_BUSY | B_CALL | B_ASYNC; 912 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 913 bp->b_iodone = lfs_supercallback; 914 vop_strategy_a.a_desc = VDESC(vop_strategy); 915 vop_strategy_a.a_bp = bp; 916 s = splbio(); 917 ++bp->b_vp->v_numoutput; 918 splx(s); 919 (strategy)(&vop_strategy_a); 920 } 921 922 /* 923 * Logical block number match routines used when traversing the dirty block 924 * chain. 925 */ 926 int 927 lfs_match_data(fs, bp) 928 struct lfs *fs; 929 struct buf *bp; 930 { 931 return (bp->b_lblkno >= 0); 932 } 933 934 int 935 lfs_match_indir(fs, bp) 936 struct lfs *fs; 937 struct buf *bp; 938 { 939 int lbn; 940 941 lbn = bp->b_lblkno; 942 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 943 } 944 945 int 946 lfs_match_dindir(fs, bp) 947 struct lfs *fs; 948 struct buf *bp; 949 { 950 int lbn; 951 952 lbn = bp->b_lblkno; 953 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 954 } 955 956 int 957 lfs_match_tindir(fs, bp) 958 struct lfs *fs; 959 struct buf *bp; 960 { 961 int lbn; 962 963 lbn = bp->b_lblkno; 964 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 965 } 966 967 /* 968 * Allocate a new buffer header. 969 */ 970 struct buf * 971 lfs_newbuf(vp, daddr, size) 972 struct vnode *vp; 973 daddr_t daddr; 974 size_t size; 975 { 976 struct buf *bp; 977 size_t nbytes; 978 979 nbytes = roundup(size, DEV_BSIZE); 980 bp = malloc(sizeof(struct buf), M_SEGMENT, M_WAITOK); 981 bzero(bp, sizeof(struct buf)); 982 if (nbytes) 983 bp->b_data = malloc(nbytes, M_SEGMENT, M_WAITOK); 984 bgetvp(vp, bp); 985 bp->b_bufsize = size; 986 bp->b_bcount = size; 987 bp->b_lblkno = daddr; 988 bp->b_blkno = daddr; 989 bp->b_error = 0; 990 bp->b_resid = 0; 991 bp->b_iodone = lfs_callback; 992 bp->b_flags |= B_BUSY | B_CALL | B_NOCACHE; 993 return (bp); 994 } 995 996 void 997 lfs_callback(bp) 998 struct buf *bp; 999 { 1000 struct lfs *fs; 1001 1002 fs = (struct lfs *)bp->b_saveaddr; 1003 #ifdef DIAGNOSTIC 1004 if (fs->lfs_iocount == 0) 1005 panic("lfs_callback: zero iocount\n"); 1006 #endif 1007 if (--fs->lfs_iocount == 0) 1008 wakeup(&fs->lfs_iocount); 1009 1010 brelvp(bp); 1011 free(bp->b_data, M_SEGMENT); 1012 free(bp, M_SEGMENT); 1013 } 1014 1015 void 1016 lfs_supercallback(bp) 1017 struct buf *bp; 1018 { 1019 brelvp(bp); 1020 free(bp->b_data, M_SEGMENT); 1021 free(bp, M_SEGMENT); 1022 } 1023 1024 /* 1025 * Shellsort (diminishing increment sort) from Data Structures and 1026 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 1027 * see also Knuth Vol. 3, page 84. The increments are selected from 1028 * formula (8), page 95. Roughly O(N^3/2). 1029 */ 1030 /* 1031 * This is our own private copy of shellsort because we want to sort 1032 * two parallel arrays (the array of buffer pointers and the array of 1033 * logical block numbers) simultaneously. Note that we cast the array 1034 * of logical block numbers to a unsigned in this routine so that the 1035 * negative block numbers (meta data blocks) sort AFTER the data blocks. 1036 */ 1037 void 1038 lfs_shellsort(bp_array, lb_array, nmemb) 1039 struct buf **bp_array; 1040 daddr_t *lb_array; 1041 register int nmemb; 1042 { 1043 static int __rsshell_increments[] = { 4, 1, 0 }; 1044 register int incr, *incrp, t1, t2; 1045 struct buf *bp_temp; 1046 u_long lb_temp; 1047 1048 for (incrp = __rsshell_increments; incr = *incrp++;) 1049 for (t1 = incr; t1 < nmemb; ++t1) 1050 for (t2 = t1 - incr; t2 >= 0;) 1051 if (lb_array[t2] > lb_array[t2 + incr]) { 1052 lb_temp = lb_array[t2]; 1053 lb_array[t2] = lb_array[t2 + incr]; 1054 lb_array[t2 + incr] = lb_temp; 1055 bp_temp = bp_array[t2]; 1056 bp_array[t2] = bp_array[t2 + incr]; 1057 bp_array[t2 + incr] = bp_temp; 1058 t2 -= incr; 1059 } else 1060 break; 1061 } 1062 1063 /* 1064 * Check VXLOCK. Return 1 if the vnode is locked. Otherwise, vget it. 1065 */ 1066 lfs_vref(vp) 1067 register struct vnode *vp; 1068 { 1069 1070 if (vp->v_flag & VXLOCK) 1071 return(1); 1072 return (vget(vp, 0)); 1073 } 1074 1075 void 1076 lfs_vunref(vp) 1077 register struct vnode *vp; 1078 { 1079 extern int lfs_no_inactive; 1080 1081 /* 1082 * This is vrele except that we do not want to VOP_INACTIVE 1083 * this vnode. Rather than inline vrele here, we use a global 1084 * flag to tell lfs_inactive not to run. Yes, its gross. 1085 */ 1086 lfs_no_inactive = 1; 1087 vrele(vp); 1088 lfs_no_inactive = 0; 1089 } 1090