1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)lfs_segment.c 8.10 (Berkeley) 06/10/95 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/namei.h> 13 #include <sys/kernel.h> 14 #include <sys/resourcevar.h> 15 #include <sys/file.h> 16 #include <sys/stat.h> 17 #include <sys/buf.h> 18 #include <sys/proc.h> 19 #include <sys/conf.h> 20 #include <sys/vnode.h> 21 #include <sys/malloc.h> 22 #include <sys/mount.h> 23 24 #include <miscfs/specfs/specdev.h> 25 #include <miscfs/fifofs/fifo.h> 26 27 #include <ufs/ufs/quota.h> 28 #include <ufs/ufs/inode.h> 29 #include <ufs/ufs/dir.h> 30 #include <ufs/ufs/ufsmount.h> 31 #include <ufs/ufs/ufs_extern.h> 32 33 #include <ufs/lfs/lfs.h> 34 #include <ufs/lfs/lfs_extern.h> 35 36 extern int count_lock_queue __P((void)); 37 38 #define MAX_ACTIVE 10 39 /* 40 * Determine if it's OK to start a partial in this segment, or if we need 41 * to go on to a new segment. 42 */ 43 #define LFS_PARTIAL_FITS(fs) \ 44 ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ 45 1 << (fs)->lfs_fsbtodb) 46 47 void lfs_callback __P((struct buf *)); 48 void lfs_gather __P((struct lfs *, struct segment *, 49 struct vnode *, int (*) __P((struct lfs *, struct buf *)))); 50 int lfs_gatherblock __P((struct segment *, struct buf *, int *)); 51 void lfs_iset __P((struct inode *, ufs_daddr_t, time_t)); 52 int lfs_match_data __P((struct lfs *, struct buf *)); 53 int lfs_match_dindir __P((struct lfs *, struct buf *)); 54 int lfs_match_indir __P((struct lfs *, struct buf *)); 55 int lfs_match_tindir __P((struct lfs *, struct buf *)); 56 void lfs_newseg __P((struct lfs *)); 57 void lfs_shellsort __P((struct buf **, ufs_daddr_t *, register int)); 58 void lfs_supercallback __P((struct buf *)); 59 void lfs_updatemeta __P((struct segment *)); 60 int lfs_vref __P((struct vnode *)); 61 void lfs_vunref __P((struct vnode *)); 62 void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *)); 63 int lfs_writeinode __P((struct lfs *, struct segment *, struct inode *)); 64 int lfs_writeseg __P((struct lfs *, struct segment *)); 65 void lfs_writesuper __P((struct lfs *)); 66 void lfs_writevnodes __P((struct lfs *fs, struct mount *mp, 67 struct segment *sp, int dirops)); 68 69 int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 70 71 /* Statistics Counters */ 72 #define DOSTATS 73 struct lfs_stats lfs_stats; 74 75 /* op values to lfs_writevnodes */ 76 #define VN_REG 0 77 #define VN_DIROP 1 78 #define VN_EMPTY 2 79 80 /* 81 * Ifile and meta data blocks are not marked busy, so segment writes MUST be 82 * single threaded. Currently, there are two paths into lfs_segwrite, sync() 83 * and getnewbuf(). They both mark the file system busy. Lfs_vflush() 84 * explicitly marks the file system busy. So lfs_segwrite is safe. I think. 85 */ 86 87 int 88 lfs_vflush(vp) 89 struct vnode *vp; 90 { 91 struct inode *ip; 92 struct lfs *fs; 93 struct segment *sp; 94 95 fs = VFSTOUFS(vp->v_mount)->um_lfs; 96 if (fs->lfs_nactive > MAX_ACTIVE) 97 return(lfs_segwrite(vp->v_mount, SEGM_SYNC|SEGM_CKP)); 98 lfs_seglock(fs, SEGM_SYNC); 99 sp = fs->lfs_sp; 100 101 102 ip = VTOI(vp); 103 if (vp->v_dirtyblkhd.lh_first == NULL) 104 lfs_writevnodes(fs, vp->v_mount, sp, VN_EMPTY); 105 106 do { 107 do { 108 if (vp->v_dirtyblkhd.lh_first != NULL) 109 lfs_writefile(fs, sp, vp); 110 } while (lfs_writeinode(fs, sp, ip)); 111 112 } while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM); 113 114 #ifdef DOSTATS 115 ++lfs_stats.nwrites; 116 if (sp->seg_flags & SEGM_SYNC) 117 ++lfs_stats.nsync_writes; 118 if (sp->seg_flags & SEGM_CKP) 119 ++lfs_stats.ncheckpoints; 120 #endif 121 lfs_segunlock(fs); 122 return (0); 123 } 124 125 void 126 lfs_writevnodes(fs, mp, sp, op) 127 struct lfs *fs; 128 struct mount *mp; 129 struct segment *sp; 130 int op; 131 { 132 struct inode *ip; 133 struct vnode *vp; 134 135 /* BEGIN HACK */ 136 #define VN_OFFSET (((void *)&vp->v_mntvnodes.le_next) - (void *)vp) 137 #define BACK_VP(VP) ((struct vnode *)(((void *)VP->v_mntvnodes.le_prev) - VN_OFFSET)) 138 #define BEG_OF_VLIST ((struct vnode *)(((void *)&mp->mnt_vnodelist.lh_first) - VN_OFFSET)) 139 140 /* Find last vnode. */ 141 loop: for (vp = mp->mnt_vnodelist.lh_first; 142 vp && vp->v_mntvnodes.le_next != NULL; 143 vp = vp->v_mntvnodes.le_next); 144 for (; vp && vp != BEG_OF_VLIST; vp = BACK_VP(vp)) { 145 /* END HACK */ 146 /* 147 loop: 148 for (vp = mp->mnt_vnodelist.lh_first; 149 vp != NULL; 150 vp = vp->v_mntvnodes.le_next) { 151 */ 152 /* 153 * If the vnode that we are about to sync is no longer 154 * associated with this mount point, start over. 155 */ 156 if (vp->v_mount != mp) 157 goto loop; 158 159 /* XXX ignore dirops for now 160 if (op == VN_DIROP && !(vp->v_flag & VDIROP) || 161 op != VN_DIROP && (vp->v_flag & VDIROP)) 162 continue; 163 */ 164 165 if (op == VN_EMPTY && vp->v_dirtyblkhd.lh_first) 166 continue; 167 168 if (vp->v_type == VNON) 169 continue; 170 171 if (lfs_vref(vp)) 172 continue; 173 174 /* 175 * Write the inode/file if dirty and it's not the 176 * the IFILE. 177 */ 178 ip = VTOI(vp); 179 if ((ip->i_flag & 180 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE) || 181 vp->v_dirtyblkhd.lh_first != NULL) && 182 ip->i_number != LFS_IFILE_INUM) { 183 if (vp->v_dirtyblkhd.lh_first != NULL) 184 lfs_writefile(fs, sp, vp); 185 (void) lfs_writeinode(fs, sp, ip); 186 } 187 vp->v_flag &= ~VDIROP; 188 lfs_vunref(vp); 189 } 190 } 191 192 int 193 lfs_segwrite(mp, flags) 194 struct mount *mp; 195 int flags; /* Do a checkpoint. */ 196 { 197 struct proc *p = curproc; /* XXX */ 198 struct buf *bp; 199 struct inode *ip; 200 struct lfs *fs; 201 struct segment *sp; 202 struct vnode *vp; 203 SEGUSE *segusep; 204 ufs_daddr_t ibno; 205 CLEANERINFO *cip; 206 int clean, do_ckp, error, i; 207 208 fs = VFSTOUFS(mp)->um_lfs; 209 210 /* 211 * If we have fewer than 2 clean segments, wait until cleaner 212 * writes. 213 */ 214 do { 215 LFS_CLEANERINFO(cip, fs, bp); 216 clean = cip->clean; 217 brelse(bp); 218 if (clean <= 2 || fs->lfs_avail <= 0) { 219 /* printf ("segs clean: %d\n", clean); */ 220 wakeup(&lfs_allclean_wakeup); 221 wakeup(&fs->lfs_nextseg); 222 if (error = tsleep(&fs->lfs_avail, PRIBIO + 1, 223 "lfs writer", 0)) 224 return (error); 225 } 226 } while (clean <= 2 || fs->lfs_avail <= 0); 227 228 /* 229 * Allocate a segment structure and enough space to hold pointers to 230 * the maximum possible number of buffers which can be described in a 231 * single summary block. 232 */ 233 do_ckp = flags & SEGM_CKP || fs->lfs_nactive > MAX_ACTIVE; 234 lfs_seglock(fs, flags | (do_ckp ? SEGM_CKP : 0)); 235 sp = fs->lfs_sp; 236 237 lfs_writevnodes(fs, mp, sp, VN_REG); 238 239 /* XXX ignore ordering of dirops for now */ 240 /* XXX 241 fs->lfs_writer = 1; 242 if (fs->lfs_dirops && (error = 243 tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) { 244 free(sp->bpp, M_SEGMENT); 245 free(sp, M_SEGMENT); 246 fs->lfs_writer = 0; 247 return (error); 248 } 249 250 lfs_writevnodes(fs, mp, sp, VN_DIROP); 251 */ 252 253 /* 254 * If we are doing a checkpoint, mark everything since the 255 * last checkpoint as no longer ACTIVE. 256 */ 257 if (do_ckp) 258 for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz; 259 --ibno >= fs->lfs_cleansz; ) { 260 if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize, 261 NOCRED, &bp)) 262 263 panic("lfs: ifile read"); 264 segusep = (SEGUSE *)bp->b_data; 265 for (i = fs->lfs_sepb; i--; segusep++) 266 segusep->su_flags &= ~SEGUSE_ACTIVE; 267 268 error = VOP_BWRITE(bp); 269 } 270 271 if (do_ckp || fs->lfs_doifile) { 272 redo: 273 vp = fs->lfs_ivnode; 274 while (vget(vp, LK_EXCLUSIVE, p)) 275 continue; 276 ip = VTOI(vp); 277 if (vp->v_dirtyblkhd.lh_first != NULL) 278 lfs_writefile(fs, sp, vp); 279 (void)lfs_writeinode(fs, sp, ip); 280 vput(vp); 281 if (lfs_writeseg(fs, sp) && do_ckp) 282 goto redo; 283 } else 284 (void) lfs_writeseg(fs, sp); 285 286 /* 287 * If the I/O count is non-zero, sleep until it reaches zero. At the 288 * moment, the user's process hangs around so we can sleep. 289 */ 290 /* XXX ignore dirops for now 291 fs->lfs_writer = 0; 292 fs->lfs_doifile = 0; 293 wakeup(&fs->lfs_dirops); 294 */ 295 296 #ifdef DOSTATS 297 ++lfs_stats.nwrites; 298 if (sp->seg_flags & SEGM_SYNC) 299 ++lfs_stats.nsync_writes; 300 if (sp->seg_flags & SEGM_CKP) 301 ++lfs_stats.ncheckpoints; 302 #endif 303 lfs_segunlock(fs); 304 return (0); 305 } 306 307 /* 308 * Write the dirty blocks associated with a vnode. 309 */ 310 void 311 lfs_writefile(fs, sp, vp) 312 struct lfs *fs; 313 struct segment *sp; 314 struct vnode *vp; 315 { 316 struct buf *bp; 317 struct finfo *fip; 318 IFILE *ifp; 319 320 if (sp->seg_bytes_left < fs->lfs_bsize || 321 sp->sum_bytes_left < sizeof(struct finfo)) 322 (void) lfs_writeseg(fs, sp); 323 324 sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(ufs_daddr_t); 325 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 326 327 fip = sp->fip; 328 fip->fi_nblocks = 0; 329 fip->fi_ino = VTOI(vp)->i_number; 330 LFS_IENTRY(ifp, fs, fip->fi_ino, bp); 331 fip->fi_version = ifp->if_version; 332 brelse(bp); 333 334 /* 335 * It may not be necessary to write the meta-data blocks at this point, 336 * as the roll-forward recovery code should be able to reconstruct the 337 * list. 338 */ 339 lfs_gather(fs, sp, vp, lfs_match_data); 340 lfs_gather(fs, sp, vp, lfs_match_indir); 341 lfs_gather(fs, sp, vp, lfs_match_dindir); 342 #ifdef TRIPLE 343 lfs_gather(fs, sp, vp, lfs_match_tindir); 344 #endif 345 346 fip = sp->fip; 347 if (fip->fi_nblocks != 0) { 348 sp->fip = 349 (struct finfo *)((caddr_t)fip + sizeof(struct finfo) + 350 sizeof(ufs_daddr_t) * (fip->fi_nblocks - 1)); 351 sp->start_lbp = &sp->fip->fi_blocks[0]; 352 } else { 353 sp->sum_bytes_left += sizeof(struct finfo) - sizeof(ufs_daddr_t); 354 --((SEGSUM *)(sp->segsum))->ss_nfinfo; 355 } 356 } 357 358 int 359 lfs_writeinode(fs, sp, ip) 360 struct lfs *fs; 361 struct segment *sp; 362 struct inode *ip; 363 { 364 struct buf *bp, *ibp; 365 IFILE *ifp; 366 SEGUSE *sup; 367 ufs_daddr_t daddr; 368 ino_t ino; 369 int error, i, ndx; 370 int redo_ifile = 0; 371 372 if (!(ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE))) 373 return(0); 374 375 /* Allocate a new inode block if necessary. */ 376 if (sp->ibp == NULL) { 377 /* Allocate a new segment if necessary. */ 378 if (sp->seg_bytes_left < fs->lfs_bsize || 379 sp->sum_bytes_left < sizeof(ufs_daddr_t)) 380 (void) lfs_writeseg(fs, sp); 381 382 /* Get next inode block. */ 383 daddr = fs->lfs_offset; 384 fs->lfs_offset += fsbtodb(fs, 1); 385 sp->ibp = *sp->cbpp++ = 386 lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, daddr, 387 fs->lfs_bsize); 388 /* Zero out inode numbers */ 389 for (i = 0; i < INOPB(fs); ++i) 390 ((struct dinode *)sp->ibp->b_data)[i].di_inumber = 0; 391 ++sp->start_bpp; 392 fs->lfs_avail -= fsbtodb(fs, 1); 393 /* Set remaining space counters. */ 394 sp->seg_bytes_left -= fs->lfs_bsize; 395 sp->sum_bytes_left -= sizeof(ufs_daddr_t); 396 ndx = LFS_SUMMARY_SIZE / sizeof(ufs_daddr_t) - 397 sp->ninodes / INOPB(fs) - 1; 398 ((ufs_daddr_t *)(sp->segsum))[ndx] = daddr; 399 } 400 401 /* Update the inode times and copy the inode onto the inode page. */ 402 if (ip->i_flag & IN_MODIFIED) 403 --fs->lfs_uinodes; 404 ITIMES(ip, &time, &time); 405 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); 406 bp = sp->ibp; 407 ((struct dinode *)bp->b_data)[sp->ninodes % INOPB(fs)] = ip->i_din; 408 /* Increment inode count in segment summary block. */ 409 ++((SEGSUM *)(sp->segsum))->ss_ninos; 410 411 /* If this page is full, set flag to allocate a new page. */ 412 if (++sp->ninodes % INOPB(fs) == 0) 413 sp->ibp = NULL; 414 415 /* 416 * If updating the ifile, update the super-block. Update the disk 417 * address and access times for this inode in the ifile. 418 */ 419 ino = ip->i_number; 420 if (ino == LFS_IFILE_INUM) { 421 daddr = fs->lfs_idaddr; 422 fs->lfs_idaddr = bp->b_blkno; 423 } else { 424 LFS_IENTRY(ifp, fs, ino, ibp); 425 daddr = ifp->if_daddr; 426 ifp->if_daddr = bp->b_blkno; 427 error = VOP_BWRITE(ibp); 428 } 429 430 /* 431 * No need to update segment usage if there was no former inode address 432 * or if the last inode address is in the current partial segment. 433 */ 434 if (daddr != LFS_UNUSED_DADDR && 435 !(daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)) { 436 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 437 #ifdef DIAGNOSTIC 438 if (sup->su_nbytes < sizeof(struct dinode)) { 439 /* XXX -- Change to a panic. */ 440 printf("lfs: negative bytes (segment %d)\n", 441 datosn(fs, daddr)); 442 panic("negative bytes"); 443 } 444 #endif 445 sup->su_nbytes -= sizeof(struct dinode); 446 redo_ifile = 447 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); 448 error = VOP_BWRITE(bp); 449 } 450 return (redo_ifile); 451 } 452 453 int 454 lfs_gatherblock(sp, bp, sptr) 455 struct segment *sp; 456 struct buf *bp; 457 int *sptr; 458 { 459 struct lfs *fs; 460 int version; 461 462 /* 463 * If full, finish this segment. We may be doing I/O, so 464 * release and reacquire the splbio(). 465 */ 466 #ifdef DIAGNOSTIC 467 if (sp->vp == NULL) 468 panic ("lfs_gatherblock: Null vp in segment"); 469 #endif 470 fs = sp->fs; 471 if (sp->sum_bytes_left < sizeof(ufs_daddr_t) || 472 sp->seg_bytes_left < bp->b_bcount) { 473 if (sptr) 474 splx(*sptr); 475 lfs_updatemeta(sp); 476 477 version = sp->fip->fi_version; 478 (void) lfs_writeseg(fs, sp); 479 480 sp->fip->fi_version = version; 481 sp->fip->fi_ino = VTOI(sp->vp)->i_number; 482 /* Add the current file to the segment summary. */ 483 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 484 sp->sum_bytes_left -= 485 sizeof(struct finfo) - sizeof(ufs_daddr_t); 486 487 if (sptr) 488 *sptr = splbio(); 489 return(1); 490 } 491 492 /* Insert into the buffer list, update the FINFO block. */ 493 bp->b_flags |= B_GATHERED; 494 *sp->cbpp++ = bp; 495 sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno; 496 497 sp->sum_bytes_left -= sizeof(ufs_daddr_t); 498 sp->seg_bytes_left -= bp->b_bcount; 499 return(0); 500 } 501 502 void 503 lfs_gather(fs, sp, vp, match) 504 struct lfs *fs; 505 struct segment *sp; 506 struct vnode *vp; 507 int (*match) __P((struct lfs *, struct buf *)); 508 { 509 struct buf *bp; 510 int s; 511 512 sp->vp = vp; 513 s = splbio(); 514 /* This is a hack to see if ordering the blocks in LFS makes a difference. */ 515 /* BEGIN HACK */ 516 #define BUF_OFFSET (((void *)&bp->b_vnbufs.le_next) - (void *)bp) 517 #define BACK_BUF(BP) ((struct buf *)(((void *)BP->b_vnbufs.le_prev) - BUF_OFFSET)) 518 #define BEG_OF_LIST ((struct buf *)(((void *)&vp->v_dirtyblkhd.lh_first) - BUF_OFFSET)) 519 520 521 /*loop: for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next) {*/ 522 /* Find last buffer. */ 523 loop: for (bp = vp->v_dirtyblkhd.lh_first; bp && bp->b_vnbufs.le_next != NULL; 524 bp = bp->b_vnbufs.le_next); 525 for (; bp && bp != BEG_OF_LIST; bp = BACK_BUF(bp)) { 526 /* END HACK */ 527 if (bp->b_flags & B_BUSY || !match(fs, bp) || 528 bp->b_flags & B_GATHERED) 529 continue; 530 #ifdef DIAGNOSTIC 531 if (!(bp->b_flags & B_DELWRI)) 532 panic("lfs_gather: bp not B_DELWRI"); 533 if (!(bp->b_flags & B_LOCKED)) 534 panic("lfs_gather: bp not B_LOCKED"); 535 #endif 536 if (lfs_gatherblock(sp, bp, &s)) 537 goto loop; 538 } 539 splx(s); 540 lfs_updatemeta(sp); 541 sp->vp = NULL; 542 } 543 544 545 /* 546 * Update the metadata that points to the blocks listed in the FINFO 547 * array. 548 */ 549 void 550 lfs_updatemeta(sp) 551 struct segment *sp; 552 { 553 SEGUSE *sup; 554 struct buf *bp; 555 struct lfs *fs; 556 struct vnode *vp; 557 struct indir a[NIADDR + 2], *ap; 558 struct inode *ip; 559 ufs_daddr_t daddr, lbn, off; 560 int error, i, nblocks, num; 561 562 vp = sp->vp; 563 nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp; 564 if (nblocks < 0) 565 panic("This is a bad thing\n"); 566 if (vp == NULL || nblocks == 0) 567 return; 568 569 /* Sort the blocks. */ 570 if (!(sp->seg_flags & SEGM_CLEAN)) 571 lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks); 572 573 /* 574 * Record the length of the last block in case it's a fragment. 575 * If there are indirect blocks present, they sort last. An 576 * indirect block will be lfs_bsize and its presence indicates 577 * that you cannot have fragments. 578 */ 579 sp->fip->fi_lastlength = sp->start_bpp[nblocks - 1]->b_bcount; 580 581 /* 582 * Assign disk addresses, and update references to the logical 583 * block and the segment usage information. 584 */ 585 fs = sp->fs; 586 for (i = nblocks; i--; ++sp->start_bpp) { 587 lbn = *sp->start_lbp++; 588 (*sp->start_bpp)->b_blkno = off = fs->lfs_offset; 589 fs->lfs_offset += 590 fragstodb(fs, numfrags(fs, (*sp->start_bpp)->b_bcount)); 591 592 if (error = ufs_bmaparray(vp, lbn, &daddr, a, &num, NULL)) 593 panic("lfs_updatemeta: ufs_bmaparray %d", error); 594 ip = VTOI(vp); 595 switch (num) { 596 case 0: 597 ip->i_db[lbn] = off; 598 break; 599 case 1: 600 ip->i_ib[a[0].in_off] = off; 601 break; 602 default: 603 ap = &a[num - 1]; 604 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 605 panic("lfs_updatemeta: bread bno %d", 606 ap->in_lbn); 607 /* 608 * Bread may create a new indirect block which needs 609 * to get counted for the inode. 610 */ 611 if (bp->b_blkno == -1 && !(bp->b_flags & B_CACHE)) { 612 ip->i_blocks += fsbtodb(fs, 1); 613 fs->lfs_bfree -= fragstodb(fs, fs->lfs_frag); 614 } 615 ((ufs_daddr_t *)bp->b_data)[ap->in_off] = off; 616 VOP_BWRITE(bp); 617 } 618 619 /* Update segment usage information. */ 620 if (daddr != UNASSIGNED && 621 !(daddr >= fs->lfs_lastpseg && daddr <= off)) { 622 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 623 #ifdef DIAGNOSTIC 624 if (sup->su_nbytes < (*sp->start_bpp)->b_bcount) { 625 /* XXX -- Change to a panic. */ 626 printf("lfs: negative bytes (segment %d)\n", 627 datosn(fs, daddr)); 628 printf("lfs: bp = 0x%x, addr = 0x%x\n", 629 bp, bp->b_un.b_addr); 630 panic ("Negative Bytes"); 631 } 632 #endif 633 sup->su_nbytes -= (*sp->start_bpp)->b_bcount; 634 error = VOP_BWRITE(bp); 635 } 636 } 637 } 638 639 /* 640 * Start a new segment. 641 */ 642 int 643 lfs_initseg(fs) 644 struct lfs *fs; 645 { 646 struct segment *sp; 647 SEGUSE *sup; 648 SEGSUM *ssp; 649 struct buf *bp; 650 int repeat; 651 652 sp = fs->lfs_sp; 653 654 repeat = 0; 655 /* Advance to the next segment. */ 656 if (!LFS_PARTIAL_FITS(fs)) { 657 /* Wake up any cleaning procs waiting on this file system. */ 658 wakeup(&lfs_allclean_wakeup); 659 wakeup(&fs->lfs_nextseg); 660 661 lfs_newseg(fs); 662 repeat = 1; 663 fs->lfs_offset = fs->lfs_curseg; 664 sp->seg_number = datosn(fs, fs->lfs_curseg); 665 sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; 666 667 /* 668 * If the segment contains a superblock, update the offset 669 * and summary address to skip over it. 670 */ 671 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 672 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 673 fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; 674 sp->seg_bytes_left -= LFS_SBPAD; 675 } 676 brelse(bp); 677 } else { 678 sp->seg_number = datosn(fs, fs->lfs_curseg); 679 sp->seg_bytes_left = (fs->lfs_dbpseg - 680 (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; 681 } 682 fs->lfs_lastpseg = fs->lfs_offset; 683 684 sp->fs = fs; 685 sp->ibp = NULL; 686 sp->ninodes = 0; 687 688 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 689 sp->cbpp = sp->bpp; 690 *sp->cbpp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_offset, 691 LFS_SUMMARY_SIZE); 692 sp->segsum = (*sp->cbpp)->b_data; 693 bzero(sp->segsum, LFS_SUMMARY_SIZE); 694 sp->start_bpp = ++sp->cbpp; 695 fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; 696 697 /* Set point to SEGSUM, initialize it. */ 698 ssp = sp->segsum; 699 ssp->ss_next = fs->lfs_nextseg; 700 ssp->ss_nfinfo = ssp->ss_ninos = 0; 701 ssp->ss_magic = SS_MAGIC; 702 703 /* Set pointer to first FINFO, initialize it. */ 704 sp->fip = (struct finfo *)((caddr_t)sp->segsum + sizeof(SEGSUM)); 705 sp->fip->fi_nblocks = 0; 706 sp->start_lbp = &sp->fip->fi_blocks[0]; 707 sp->fip->fi_lastlength = 0; 708 709 sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 710 sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); 711 712 return(repeat); 713 } 714 715 /* 716 * Return the next segment to write. 717 */ 718 void 719 lfs_newseg(fs) 720 struct lfs *fs; 721 { 722 CLEANERINFO *cip; 723 SEGUSE *sup; 724 struct buf *bp; 725 int curseg, isdirty, sn; 726 727 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp); 728 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 729 sup->su_nbytes = 0; 730 sup->su_nsums = 0; 731 sup->su_ninos = 0; 732 (void) VOP_BWRITE(bp); 733 734 LFS_CLEANERINFO(cip, fs, bp); 735 --cip->clean; 736 ++cip->dirty; 737 (void) VOP_BWRITE(bp); 738 739 fs->lfs_lastseg = fs->lfs_curseg; 740 fs->lfs_curseg = fs->lfs_nextseg; 741 for (sn = curseg = datosn(fs, fs->lfs_curseg);;) { 742 sn = (sn + 1) % fs->lfs_nseg; 743 if (sn == curseg) 744 panic("lfs_nextseg: no clean segments"); 745 LFS_SEGENTRY(sup, fs, sn, bp); 746 isdirty = sup->su_flags & SEGUSE_DIRTY; 747 brelse(bp); 748 if (!isdirty) 749 break; 750 } 751 752 ++fs->lfs_nactive; 753 fs->lfs_nextseg = sntoda(fs, sn); 754 #ifdef DOSTATS 755 ++lfs_stats.segsused; 756 #endif 757 } 758 759 int 760 lfs_writeseg(fs, sp) 761 struct lfs *fs; 762 struct segment *sp; 763 { 764 extern int locked_queue_count; 765 struct buf **bpp, *bp, *cbp; 766 SEGUSE *sup; 767 SEGSUM *ssp; 768 dev_t i_dev; 769 u_long *datap, *dp; 770 int do_again, i, nblocks, s; 771 int (*strategy)__P((struct vop_strategy_args *)); 772 struct vop_strategy_args vop_strategy_a; 773 u_short ninos; 774 char *p; 775 776 /* 777 * If there are no buffers other than the segment summary to write 778 * and it is not a checkpoint, don't do anything. On a checkpoint, 779 * even if there aren't any buffers, you need to write the superblock. 780 */ 781 if ((nblocks = sp->cbpp - sp->bpp) == 1) 782 return (0); 783 784 /* Update the segment usage information. */ 785 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 786 787 /* Loop through all blocks, except the segment summary. */ 788 for (bpp = sp->bpp; ++bpp < sp->cbpp; ) 789 sup->su_nbytes += (*bpp)->b_bcount; 790 791 ssp = (SEGSUM *)sp->segsum; 792 793 ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs); 794 sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode); 795 sup->su_nbytes += LFS_SUMMARY_SIZE; 796 sup->su_lastmod = time.tv_sec; 797 sup->su_ninos += ninos; 798 ++sup->su_nsums; 799 do_again = !(bp->b_flags & B_GATHERED); 800 (void)VOP_BWRITE(bp); 801 /* 802 * Compute checksum across data and then across summary; the first 803 * block (the summary block) is skipped. Set the create time here 804 * so that it's guaranteed to be later than the inode mod times. 805 * 806 * XXX 807 * Fix this to do it inline, instead of malloc/copy. 808 */ 809 datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); 810 for (bpp = sp->bpp, i = nblocks - 1; i--;) { 811 if ((*++bpp)->b_flags & B_INVAL) { 812 if (copyin((*bpp)->b_saveaddr, dp++, sizeof(u_long))) 813 panic("lfs_writeseg: copyin failed"); 814 } else 815 *dp++ = ((u_long *)(*bpp)->b_data)[0]; 816 } 817 ssp->ss_create = time.tv_sec; 818 ssp->ss_datasum = cksum(datap, (nblocks - 1) * sizeof(u_long)); 819 ssp->ss_sumsum = 820 cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum)); 821 free(datap, M_SEGMENT); 822 #ifdef DIAGNOSTIC 823 if (fs->lfs_bfree < fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE) 824 panic("lfs_writeseg: No diskspace for summary"); 825 #endif 826 fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE); 827 828 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 829 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 830 831 /* 832 * When we simply write the blocks we lose a rotation for every block 833 * written. To avoid this problem, we allocate memory in chunks, copy 834 * the buffers into the chunk and write the chunk. MAXPHYS is the 835 * largest size I/O devices can handle. 836 * When the data is copied to the chunk, turn off the the B_LOCKED bit 837 * and brelse the buffer (which will move them to the LRU list). Add 838 * the B_CALL flag to the buffer header so we can count I/O's for the 839 * checkpoints and so we can release the allocated memory. 840 * 841 * XXX 842 * This should be removed if the new virtual memory system allows us to 843 * easily make the buffers contiguous in kernel memory and if that's 844 * fast enough. 845 */ 846 for (bpp = sp->bpp, i = nblocks; i;) { 847 cbp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, 848 (*bpp)->b_blkno, MAXPHYS); 849 cbp->b_dev = i_dev; 850 cbp->b_flags |= B_ASYNC | B_BUSY; 851 cbp->b_bcount = 0; 852 853 s = splbio(); 854 ++fs->lfs_iocount; 855 for (p = cbp->b_data; i && cbp->b_bcount < MAXPHYS; i--) { 856 bp = *bpp; 857 if (bp->b_bcount > (MAXPHYS - cbp->b_bcount)) 858 break; 859 bpp++; 860 861 /* 862 * Fake buffers from the cleaner are marked as B_INVAL. 863 * We need to copy the data from user space rather than 864 * from the buffer indicated. 865 * XXX == what do I do on an error? 866 */ 867 if (bp->b_flags & B_INVAL) { 868 if (copyin(bp->b_saveaddr, p, bp->b_bcount)) 869 panic("lfs_writeseg: copyin failed"); 870 } else 871 bcopy(bp->b_data, p, bp->b_bcount); 872 p += bp->b_bcount; 873 cbp->b_bcount += bp->b_bcount; 874 if (bp->b_flags & B_LOCKED) 875 --locked_queue_count; 876 bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI | 877 B_LOCKED | B_GATHERED); 878 if (bp->b_flags & B_CALL) { 879 /* if B_CALL, it was created with newbuf */ 880 brelvp(bp); 881 if (!(bp->b_flags & B_INVAL)) 882 free(bp->b_data, M_SEGMENT); 883 free(bp, M_SEGMENT); 884 } else { 885 bremfree(bp); 886 bp->b_flags |= B_DONE; 887 reassignbuf(bp, bp->b_vp); 888 brelse(bp); 889 } 890 } 891 ++cbp->b_vp->v_numoutput; 892 splx(s); 893 /* 894 * XXXX This is a gross and disgusting hack. Since these 895 * buffers are physically addressed, they hang off the 896 * device vnode (devvp). As a result, they have no way 897 * of getting to the LFS superblock or lfs structure to 898 * keep track of the number of I/O's pending. So, I am 899 * going to stuff the fs into the saveaddr field of 900 * the buffer (yuk). 901 */ 902 cbp->b_saveaddr = (caddr_t)fs; 903 vop_strategy_a.a_desc = VDESC(vop_strategy); 904 vop_strategy_a.a_bp = cbp; 905 (strategy)(&vop_strategy_a); 906 } 907 /* 908 * XXX 909 * Vinvalbuf can move locked buffers off the locked queue 910 * and we have no way of knowing about this. So, after 911 * doing a big write, we recalculate how many bufers are 912 * really still left on the locked queue. 913 */ 914 locked_queue_count = count_lock_queue(); 915 wakeup(&locked_queue_count); 916 #ifdef DOSTATS 917 ++lfs_stats.psegwrites; 918 lfs_stats.blocktot += nblocks - 1; 919 if (fs->lfs_sp->seg_flags & SEGM_SYNC) 920 ++lfs_stats.psyncwrites; 921 if (fs->lfs_sp->seg_flags & SEGM_CLEAN) { 922 ++lfs_stats.pcleanwrites; 923 lfs_stats.cleanblocks += nblocks - 1; 924 } 925 #endif 926 return (lfs_initseg(fs) || do_again); 927 } 928 929 void 930 lfs_writesuper(fs) 931 struct lfs *fs; 932 { 933 struct buf *bp; 934 dev_t i_dev; 935 int (*strategy) __P((struct vop_strategy_args *)); 936 int s; 937 struct vop_strategy_args vop_strategy_a; 938 939 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 940 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 941 942 /* Checksum the superblock and copy it into a buffer. */ 943 fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum)); 944 bp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_sboffs[0], 945 LFS_SBPAD); 946 *(struct lfs *)bp->b_data = *fs; 947 948 /* XXX Toggle between first two superblocks; for now just write first */ 949 bp->b_dev = i_dev; 950 bp->b_flags |= B_BUSY | B_CALL | B_ASYNC; 951 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 952 bp->b_iodone = lfs_supercallback; 953 vop_strategy_a.a_desc = VDESC(vop_strategy); 954 vop_strategy_a.a_bp = bp; 955 s = splbio(); 956 ++bp->b_vp->v_numoutput; 957 splx(s); 958 (strategy)(&vop_strategy_a); 959 } 960 961 /* 962 * Logical block number match routines used when traversing the dirty block 963 * chain. 964 */ 965 int 966 lfs_match_data(fs, bp) 967 struct lfs *fs; 968 struct buf *bp; 969 { 970 return (bp->b_lblkno >= 0); 971 } 972 973 int 974 lfs_match_indir(fs, bp) 975 struct lfs *fs; 976 struct buf *bp; 977 { 978 int lbn; 979 980 lbn = bp->b_lblkno; 981 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 982 } 983 984 int 985 lfs_match_dindir(fs, bp) 986 struct lfs *fs; 987 struct buf *bp; 988 { 989 int lbn; 990 991 lbn = bp->b_lblkno; 992 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 993 } 994 995 int 996 lfs_match_tindir(fs, bp) 997 struct lfs *fs; 998 struct buf *bp; 999 { 1000 int lbn; 1001 1002 lbn = bp->b_lblkno; 1003 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 1004 } 1005 1006 /* 1007 * Allocate a new buffer header. 1008 */ 1009 struct buf * 1010 lfs_newbuf(vp, daddr, size) 1011 struct vnode *vp; 1012 ufs_daddr_t daddr; 1013 size_t size; 1014 { 1015 struct buf *bp; 1016 size_t nbytes; 1017 1018 nbytes = roundup(size, DEV_BSIZE); 1019 bp = malloc(sizeof(struct buf), M_SEGMENT, M_WAITOK); 1020 bzero(bp, sizeof(struct buf)); 1021 if (nbytes) 1022 bp->b_data = malloc(nbytes, M_SEGMENT, M_WAITOK); 1023 bgetvp(vp, bp); 1024 bp->b_bufsize = size; 1025 bp->b_bcount = size; 1026 bp->b_lblkno = daddr; 1027 bp->b_blkno = daddr; 1028 bp->b_error = 0; 1029 bp->b_resid = 0; 1030 bp->b_iodone = lfs_callback; 1031 bp->b_flags |= B_BUSY | B_CALL | B_NOCACHE; 1032 return (bp); 1033 } 1034 1035 void 1036 lfs_callback(bp) 1037 struct buf *bp; 1038 { 1039 struct lfs *fs; 1040 1041 fs = (struct lfs *)bp->b_saveaddr; 1042 #ifdef DIAGNOSTIC 1043 if (fs->lfs_iocount == 0) 1044 panic("lfs_callback: zero iocount\n"); 1045 #endif 1046 if (--fs->lfs_iocount == 0) 1047 wakeup(&fs->lfs_iocount); 1048 1049 brelvp(bp); 1050 free(bp->b_data, M_SEGMENT); 1051 free(bp, M_SEGMENT); 1052 } 1053 1054 void 1055 lfs_supercallback(bp) 1056 struct buf *bp; 1057 { 1058 brelvp(bp); 1059 free(bp->b_data, M_SEGMENT); 1060 free(bp, M_SEGMENT); 1061 } 1062 1063 /* 1064 * Shellsort (diminishing increment sort) from Data Structures and 1065 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 1066 * see also Knuth Vol. 3, page 84. The increments are selected from 1067 * formula (8), page 95. Roughly O(N^3/2). 1068 */ 1069 /* 1070 * This is our own private copy of shellsort because we want to sort 1071 * two parallel arrays (the array of buffer pointers and the array of 1072 * logical block numbers) simultaneously. Note that we cast the array 1073 * of logical block numbers to a unsigned in this routine so that the 1074 * negative block numbers (meta data blocks) sort AFTER the data blocks. 1075 */ 1076 void 1077 lfs_shellsort(bp_array, lb_array, nmemb) 1078 struct buf **bp_array; 1079 ufs_daddr_t *lb_array; 1080 register int nmemb; 1081 { 1082 static int __rsshell_increments[] = { 4, 1, 0 }; 1083 register int incr, *incrp, t1, t2; 1084 struct buf *bp_temp; 1085 u_long lb_temp; 1086 1087 for (incrp = __rsshell_increments; incr = *incrp++;) 1088 for (t1 = incr; t1 < nmemb; ++t1) 1089 for (t2 = t1 - incr; t2 >= 0;) 1090 if (lb_array[t2] > lb_array[t2 + incr]) { 1091 lb_temp = lb_array[t2]; 1092 lb_array[t2] = lb_array[t2 + incr]; 1093 lb_array[t2 + incr] = lb_temp; 1094 bp_temp = bp_array[t2]; 1095 bp_array[t2] = bp_array[t2 + incr]; 1096 bp_array[t2 + incr] = bp_temp; 1097 t2 -= incr; 1098 } else 1099 break; 1100 } 1101 1102 /* 1103 * Check VXLOCK. Return 1 if the vnode is locked. Otherwise, vget it. 1104 */ 1105 lfs_vref(vp) 1106 register struct vnode *vp; 1107 { 1108 struct proc *p = curproc; /* XXX */ 1109 1110 if (vp->v_flag & VXLOCK) /* XXX */ 1111 return(1); 1112 return (vget(vp, 0, p)); 1113 } 1114 1115 /* 1116 * This is vrele except that we do not want to VOP_INACTIVE this vnode. We 1117 * inline vrele here to avoid the vn_lock and VOP_INACTIVE call at the end. 1118 */ 1119 void 1120 lfs_vunref(vp) 1121 register struct vnode *vp; 1122 { 1123 struct proc *p = curproc; /* XXX */ 1124 extern struct simplelock vnode_free_list_slock; /* XXX */ 1125 extern TAILQ_HEAD(freelst, vnode) vnode_free_list; /* XXX */ 1126 1127 simple_lock(&vp->v_interlock); 1128 vp->v_usecount--; 1129 if (vp->v_usecount > 0) { 1130 simple_unlock(&vp->v_interlock); 1131 return; 1132 } 1133 /* 1134 * insert at tail of LRU list 1135 */ 1136 simple_lock(&vnode_free_list_slock); 1137 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1138 simple_unlock(&vnode_free_list_slock); 1139 simple_unlock(&vp->v_interlock); 1140 } 1141