1 /* vfs_bio.c 3.12 09/16/80 */ 2 3 #include "../h/param.h" 4 #include "../h/systm.h" 5 #include "../h/dir.h" 6 #include "../h/user.h" 7 #include "../h/buf.h" 8 #include "../h/conf.h" 9 #include "../h/proc.h" 10 #include "../h/seg.h" 11 #include "../h/pte.h" 12 #include "../h/vm.h" 13 14 /* 15 * The following several routines allocate and free 16 * buffers with various side effects. In general the 17 * arguments to an allocate routine are a device and 18 * a block number, and the value is a pointer to 19 * to the buffer header; the buffer is marked "busy" 20 * so that no one else can touch it. If the block was 21 * already in core, no I/O need be done; if it is 22 * already busy, the process waits until it becomes free. 23 * The following routines allocate a buffer: 24 * getblk 25 * bread 26 * breada 27 * baddr (if it is incore) 28 * Eventually the buffer must be released, possibly with the 29 * side effect of writing it out, by using one of 30 * bwrite 31 * bdwrite 32 * bawrite 33 * brelse 34 */ 35 36 #define BUFHSZ 63 37 #define BUFHASH(blkno) (blkno % BUFHSZ) 38 short bufhash[BUFHSZ]; 39 40 /* 41 * Initialize hash links for buffers. 42 */ 43 bhinit() 44 { 45 register int i; 46 47 for (i = 0; i < BUFHSZ; i++) 48 bufhash[i] = -1; 49 } 50 51 /* #define DISKMON 1 */ 52 53 #ifdef DISKMON 54 struct { 55 int nbuf; 56 long nread; 57 long nreada; 58 long ncache; 59 long nwrite; 60 long bufcount[NBUF]; 61 } io_info; 62 #endif 63 64 /* 65 * Swap IO headers - 66 * They contain the necessary information for the swap I/O. 67 * At any given time, a swap header can be in three 68 * different lists. When free it is in the free list, 69 * when allocated and the I/O queued, it is on the swap 70 * device list, and finally, if the operation was a dirty 71 * page push, when the I/O completes, it is inserted 72 * in a list of cleaned pages to be processed by the pageout daemon. 73 */ 74 struct buf swbuf[NSWBUF]; 75 short swsize[NSWBUF]; /* CAN WE JUST USE B_BCOUNT? */ 76 int swpf[NSWBUF]; 77 78 79 #ifdef FASTVAX 80 #define notavail(bp) \ 81 { \ 82 int s = spl6(); \ 83 (bp)->av_back->av_forw = (bp)->av_forw; \ 84 (bp)->av_forw->av_back = (bp)->av_back; \ 85 (bp)->b_flags |= B_BUSY; \ 86 splx(s); \ 87 } 88 #endif 89 90 /* 91 * Read in (if necessary) the block and return a buffer pointer. 92 */ 93 struct buf * 94 bread(dev, blkno) 95 dev_t dev; 96 daddr_t blkno; 97 { 98 register struct buf *bp; 99 100 bp = getblk(dev, blkno); 101 if (bp->b_flags&B_DONE) { 102 #ifdef DISKMON 103 io_info.ncache++; 104 #endif 105 return(bp); 106 } 107 bp->b_flags |= B_READ; 108 bp->b_bcount = BSIZE; 109 (*bdevsw[major(dev)].d_strategy)(bp); 110 #ifdef DISKMON 111 io_info.nread++; 112 #endif 113 u.u_vm.vm_inblk++; /* pay for read */ 114 iowait(bp); 115 return(bp); 116 } 117 118 /* 119 * Read in the block, like bread, but also start I/O on the 120 * read-ahead block (which is not allocated to the caller) 121 */ 122 struct buf * 123 breada(dev, blkno, rablkno) 124 dev_t dev; 125 daddr_t blkno, rablkno; 126 { 127 register struct buf *bp, *rabp; 128 129 bp = NULL; 130 if (!incore(dev, blkno)) { 131 bp = getblk(dev, blkno); 132 if ((bp->b_flags&B_DONE) == 0) { 133 bp->b_flags |= B_READ; 134 bp->b_bcount = BSIZE; 135 (*bdevsw[major(dev)].d_strategy)(bp); 136 #ifdef DISKMON 137 io_info.nread++; 138 #endif 139 u.u_vm.vm_inblk++; /* pay for read */ 140 } 141 } 142 if (rablkno && !incore(dev, rablkno)) { 143 rabp = getblk(dev, rablkno); 144 if (rabp->b_flags & B_DONE) 145 brelse(rabp); 146 else { 147 rabp->b_flags |= B_READ|B_ASYNC; 148 rabp->b_bcount = BSIZE; 149 (*bdevsw[major(dev)].d_strategy)(rabp); 150 #ifdef DISKMON 151 io_info.nreada++; 152 #endif 153 u.u_vm.vm_inblk++; /* pay in advance */ 154 } 155 } 156 if(bp == NULL) 157 return(bread(dev, blkno)); 158 iowait(bp); 159 return(bp); 160 } 161 162 /* 163 * Write the buffer, waiting for completion. 164 * Then release the buffer. 165 */ 166 bwrite(bp) 167 register struct buf *bp; 168 { 169 register flag; 170 171 flag = bp->b_flags; 172 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); 173 bp->b_bcount = BSIZE; 174 #ifdef DISKMON 175 io_info.nwrite++; 176 #endif 177 if ((flag&B_DELWRI) == 0) 178 u.u_vm.vm_oublk++; /* noone paid yet */ 179 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 180 if ((flag&B_ASYNC) == 0) { 181 iowait(bp); 182 brelse(bp); 183 } else if (flag & B_DELWRI) 184 bp->b_flags |= B_AGE; 185 else 186 geterror(bp); 187 } 188 189 /* 190 * Release the buffer, marking it so that if it is grabbed 191 * for another purpose it will be written out before being 192 * given up (e.g. when writing a partial block where it is 193 * assumed that another write for the same block will soon follow). 194 * This can't be done for magtape, since writes must be done 195 * in the same order as requested. 196 */ 197 bdwrite(bp) 198 register struct buf *bp; 199 { 200 register struct buf *dp; 201 202 if ((bp->b_flags&B_DELWRI) == 0) 203 u.u_vm.vm_oublk++; /* noone paid yet */ 204 dp = bdevsw[major(bp->b_dev)].d_tab; 205 if(dp->b_flags & B_TAPE) 206 bawrite(bp); 207 else { 208 bp->b_flags |= B_DELWRI | B_DONE; 209 brelse(bp); 210 } 211 } 212 213 /* 214 * Release the buffer, start I/O on it, but don't wait for completion. 215 */ 216 bawrite(bp) 217 register struct buf *bp; 218 { 219 220 bp->b_flags |= B_ASYNC; 221 bwrite(bp); 222 } 223 224 /* 225 * release the buffer, with no I/O implied. 226 */ 227 brelse(bp) 228 register struct buf *bp; 229 { 230 register struct buf **backp; 231 register s; 232 233 if (bp->b_flags&B_WANTED) 234 wakeup((caddr_t)bp); 235 if (bfreelist.b_flags&B_WANTED) { 236 bfreelist.b_flags &= ~B_WANTED; 237 wakeup((caddr_t)&bfreelist); 238 } 239 if ((bp->b_flags&B_ERROR) && bp->b_dev != NODEV) { 240 bunhash(bp); 241 bp->b_dev = NODEV; /* no assoc. on error */ 242 } 243 s = spl6(); 244 if(bp->b_flags & (B_AGE|B_ERROR)) { 245 backp = &bfreelist.av_forw; 246 (*backp)->av_back = bp; 247 bp->av_forw = *backp; 248 *backp = bp; 249 bp->av_back = &bfreelist; 250 } else { 251 backp = &bfreelist.av_back; 252 (*backp)->av_forw = bp; 253 bp->av_back = *backp; 254 *backp = bp; 255 bp->av_forw = &bfreelist; 256 } 257 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 258 splx(s); 259 } 260 261 /* 262 * See if the block is associated with some buffer 263 * (mainly to avoid getting hung up on a wait in breada) 264 */ 265 incore(dev, blkno) 266 dev_t dev; 267 daddr_t blkno; 268 { 269 register struct buf *bp; 270 register int dblkno = fsbtodb(blkno); 271 272 for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1]; 273 bp = &buf[bp->b_hlink]) 274 if (bp->b_blkno == dblkno && bp->b_dev == dev) 275 return (1); 276 return (0); 277 } 278 279 struct buf * 280 baddr(dev, blkno) 281 dev_t dev; 282 daddr_t blkno; 283 { 284 285 if (incore(dev, blkno)) 286 return (bread(dev, blkno)); 287 return (0); 288 } 289 290 /* 291 * Assign a buffer for the given block. If the appropriate 292 * block is already associated, return it; otherwise search 293 * for the oldest non-busy buffer and reassign it. 294 */ 295 struct buf * 296 getblk(dev, blkno) 297 dev_t dev; 298 daddr_t blkno; 299 { 300 register struct buf *bp, *dp, *ep; 301 register int i, x; 302 register int dblkno = fsbtodb(blkno); 303 304 loop: 305 (void) spl0(); 306 for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1]; 307 bp = &buf[bp->b_hlink]) { 308 if (bp->b_blkno != dblkno || bp->b_dev != dev) 309 continue; 310 (void) spl6(); 311 if (bp->b_flags&B_BUSY) { 312 bp->b_flags |= B_WANTED; 313 sleep((caddr_t)bp, PRIBIO+1); 314 goto loop; 315 } 316 (void) spl0(); 317 #ifdef DISKMON 318 i = 0; 319 dp = bp->av_forw; 320 while (dp != &bfreelist) { 321 i++; 322 dp = dp->av_forw; 323 } 324 if (i<NBUF) 325 io_info.bufcount[i]++; 326 #endif 327 notavail(bp); 328 bp->b_flags |= B_CACHE; 329 return(bp); 330 } 331 if (major(dev) >= nblkdev) 332 panic("blkdev"); 333 dp = bdevsw[major(dev)].d_tab; 334 if (dp == NULL) 335 panic("devtab"); 336 (void) spl6(); 337 if (bfreelist.av_forw == &bfreelist) { 338 bfreelist.b_flags |= B_WANTED; 339 sleep((caddr_t)&bfreelist, PRIBIO+1); 340 goto loop; 341 } 342 spl0(); 343 bp = bfreelist.av_forw; 344 notavail(bp); 345 if (bp->b_flags & B_DELWRI) { 346 bp->b_flags |= B_ASYNC; 347 bwrite(bp); 348 goto loop; 349 } 350 if (bp->b_dev == NODEV) 351 goto done; 352 /* INLINE EXPANSION OF bunhash(bp) */ 353 (void) spl6(); 354 i = BUFHASH(dbtofsb(bp->b_blkno)); 355 x = bp - buf; 356 if (bufhash[i] == x) { 357 bufhash[i] = bp->b_hlink; 358 } else { 359 for (ep = &buf[bufhash[i]]; ep != &buf[-1]; 360 ep = &buf[ep->b_hlink]) 361 if (ep->b_hlink == x) { 362 ep->b_hlink = bp->b_hlink; 363 goto done; 364 } 365 panic("getblk"); 366 } 367 done: 368 (void) spl0(); 369 /* END INLINE EXPANSION */ 370 bp->b_flags = B_BUSY; 371 bp->b_back->b_forw = bp->b_forw; 372 bp->b_forw->b_back = bp->b_back; 373 bp->b_forw = dp->b_forw; 374 bp->b_back = dp; 375 dp->b_forw->b_back = bp; 376 dp->b_forw = bp; 377 bp->b_dev = dev; 378 bp->b_blkno = dblkno; 379 i = BUFHASH(blkno); 380 bp->b_hlink = bufhash[i]; 381 bufhash[i] = bp - buf; 382 return(bp); 383 } 384 385 /* 386 * get an empty block, 387 * not assigned to any particular device 388 */ 389 struct buf * 390 geteblk() 391 { 392 register struct buf *bp, *dp; 393 394 loop: 395 (void) spl6(); 396 while (bfreelist.av_forw == &bfreelist) { 397 bfreelist.b_flags |= B_WANTED; 398 sleep((caddr_t)&bfreelist, PRIBIO+1); 399 } 400 (void) spl0(); 401 dp = &bfreelist; 402 bp = bfreelist.av_forw; 403 notavail(bp); 404 if (bp->b_flags & B_DELWRI) { 405 bp->b_flags |= B_ASYNC; 406 bwrite(bp); 407 goto loop; 408 } 409 if (bp->b_dev != NODEV) 410 bunhash(bp); 411 bp->b_flags = B_BUSY; 412 bp->b_back->b_forw = bp->b_forw; 413 bp->b_forw->b_back = bp->b_back; 414 bp->b_forw = dp->b_forw; 415 bp->b_back = dp; 416 dp->b_forw->b_back = bp; 417 dp->b_forw = bp; 418 bp->b_dev = (dev_t)NODEV; 419 bp->b_hlink = -1; 420 return(bp); 421 } 422 423 bunhash(bp) 424 register struct buf *bp; 425 { 426 register struct buf *ep; 427 register int i, x, s; 428 429 if (bp->b_dev == NODEV) 430 return; 431 s = spl6(); 432 i = BUFHASH(dbtofsb(bp->b_blkno)); 433 x = bp - buf; 434 if (bufhash[i] == x) { 435 bufhash[i] = bp->b_hlink; 436 goto ret; 437 } 438 for (ep = &buf[bufhash[i]]; ep != &buf[-1]; 439 ep = &buf[ep->b_hlink]) 440 if (ep->b_hlink == x) { 441 ep->b_hlink = bp->b_hlink; 442 goto ret; 443 } 444 panic("bunhash"); 445 ret: 446 splx(s); 447 } 448 449 /* 450 * Wait for I/O completion on the buffer; return errors 451 * to the user. 452 */ 453 iowait(bp) 454 register struct buf *bp; 455 { 456 457 (void) spl6(); 458 while ((bp->b_flags&B_DONE)==0) 459 sleep((caddr_t)bp, PRIBIO); 460 (void) spl0(); 461 geterror(bp); 462 } 463 464 #ifndef FASTVAX 465 /* 466 * Unlink a buffer from the available list and mark it busy. 467 * (internal interface) 468 */ 469 notavail(bp) 470 register struct buf *bp; 471 { 472 register s; 473 474 s = spl6(); 475 bp->av_back->av_forw = bp->av_forw; 476 bp->av_forw->av_back = bp->av_back; 477 bp->b_flags |= B_BUSY; 478 splx(s); 479 } 480 #endif 481 482 /* 483 * Mark I/O complete on a buffer. If the header 484 * indicates a dirty page push completion, the 485 * header is inserted into the ``cleaned'' list 486 * to be processed by the pageout daemon. Otherwise 487 * release it if I/O is asynchronous, and wake 488 * up anyone waiting for it. 489 */ 490 iodone(bp) 491 register struct buf *bp; 492 { 493 register int s; 494 495 if (bp->b_flags & B_DONE) 496 panic("dup iodone"); 497 bp->b_flags |= B_DONE; 498 if (bp->b_flags & B_DIRTY) { 499 if (bp->b_flags & B_ERROR) 500 panic("IO err in push"); 501 s = spl6(); 502 cnt.v_pgout++; 503 bp->av_forw = bclnlist; 504 bp->b_bcount = swsize[bp - swbuf]; 505 bp->b_pfcent = swpf[bp - swbuf]; 506 bclnlist = bp; 507 if (bswlist.b_flags & B_WANTED) 508 wakeup((caddr_t)&proc[2]); 509 splx(s); 510 return; 511 } 512 if (bp->b_flags&B_ASYNC) 513 brelse(bp); 514 else { 515 bp->b_flags &= ~B_WANTED; 516 wakeup((caddr_t)bp); 517 } 518 } 519 520 /* 521 * Zero the core associated with a buffer. 522 */ 523 clrbuf(bp) 524 struct buf *bp; 525 { 526 register *p; 527 register c; 528 529 p = bp->b_un.b_words; 530 c = BSIZE/sizeof(int); 531 do 532 *p++ = 0; 533 while (--c); 534 bp->b_resid = 0; 535 } 536 537 /* 538 * swap I/O - 539 * 540 * If the flag indicates a dirty page push initiated 541 * by the pageout daemon, we map the page into the i th 542 * virtual page of process 2 (the daemon itself) where i is 543 * the index of the swap header that has been allocated. 544 * We simply initialize the header and queue the I/O but 545 * do not wait for completion. When the I/O completes, 546 * iodone() will link the header to a list of cleaned 547 * pages to be processed by the pageout daemon. 548 */ 549 swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) 550 struct proc *p; 551 swblk_t dblkno; 552 caddr_t addr; 553 int flag, nbytes; 554 dev_t dev; 555 unsigned pfcent; 556 { 557 register struct buf *bp; 558 register int c; 559 int p2dp; 560 register struct pte *dpte, *vpte; 561 562 (void) spl6(); 563 while (bswlist.av_forw == NULL) { 564 bswlist.b_flags |= B_WANTED; 565 sleep((caddr_t)&bswlist, PSWP+1); 566 } 567 bp = bswlist.av_forw; 568 bswlist.av_forw = bp->av_forw; 569 (void) spl0(); 570 571 bp->b_flags = B_BUSY | B_PHYS | rdflg | flag; 572 if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0) 573 if (rdflg == B_READ) 574 sum.v_pswpin += btoc(nbytes); 575 else 576 sum.v_pswpout += btoc(nbytes); 577 bp->b_proc = p; 578 if (flag & B_DIRTY) { 579 p2dp = ((bp - swbuf) * CLSIZE) * KLMAX; 580 dpte = dptopte(&proc[2], p2dp); 581 vpte = vtopte(p, btop(addr)); 582 for (c = 0; c < nbytes; c += NBPG) { 583 if (vpte->pg_pfnum == 0 || vpte->pg_fod) 584 panic("swap bad pte"); 585 *dpte++ = *vpte++; 586 } 587 bp->b_un.b_addr = (caddr_t)ctob(p2dp); 588 } else 589 bp->b_un.b_addr = addr; 590 while (nbytes > 0) { 591 c = imin(ctob(120), nbytes); 592 bp->b_bcount = c; 593 bp->b_blkno = dblkno; 594 bp->b_dev = dev; 595 if (flag & B_DIRTY) { 596 swpf[bp - swbuf] = pfcent; 597 swsize[bp - swbuf] = nbytes; 598 } 599 (*bdevsw[major(dev)].d_strategy)(bp); 600 if (flag & B_DIRTY) { 601 if (c < nbytes) 602 panic("big push"); 603 return; 604 } 605 (void) spl6(); 606 while((bp->b_flags&B_DONE)==0) 607 sleep((caddr_t)bp, PSWP); 608 (void) spl0(); 609 bp->b_un.b_addr += c; 610 bp->b_flags &= ~B_DONE; 611 if (bp->b_flags & B_ERROR) { 612 if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE) 613 panic("hard IO err in swap"); 614 swkill(p, (char *)0); 615 } 616 nbytes -= c; 617 dblkno += btoc(c); 618 } 619 (void) spl6(); 620 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 621 bp->av_forw = bswlist.av_forw; 622 bswlist.av_forw = bp; 623 if (bswlist.b_flags & B_WANTED) { 624 bswlist.b_flags &= ~B_WANTED; 625 wakeup((caddr_t)&bswlist); 626 wakeup((caddr_t)&proc[2]); 627 } 628 (void) spl0(); 629 } 630 631 /* 632 * If rout == 0 then killed on swap error, else 633 * rout is the name of the routine where we ran out of 634 * swap space. 635 */ 636 swkill(p, rout) 637 struct proc *p; 638 char *rout; 639 { 640 641 printf("%d: ", p->p_pid); 642 if (rout) 643 printf("out of swap space in %s\n", rout); 644 else 645 printf("killed on swap error\n"); 646 /* 647 * To be sure no looping (e.g. in vmsched trying to 648 * swap out) mark process locked in core (as though 649 * done by user) after killing it so noone will try 650 * to swap it out. 651 */ 652 psignal(p, SIGKILL); 653 p->p_flag |= SULOCK; 654 } 655 656 /* 657 * make sure all write-behind blocks 658 * on dev (or NODEV for all) 659 * are flushed out. 660 * (from umount and update) 661 */ 662 bflush(dev) 663 dev_t dev; 664 { 665 register struct buf *bp; 666 667 loop: 668 (void) spl6(); 669 for (bp = bfreelist.av_forw; bp != &bfreelist; bp = bp->av_forw) { 670 if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) { 671 bp->b_flags |= B_ASYNC; 672 notavail(bp); 673 bwrite(bp); 674 goto loop; 675 } 676 } 677 (void) spl0(); 678 } 679 680 /* 681 * Raw I/O. The arguments are 682 * The strategy routine for the device 683 * A buffer, which will always be a special buffer 684 * header owned exclusively by the device for this purpose 685 * The device number 686 * Read/write flag 687 * Essentially all the work is computing physical addresses and 688 * validating them. 689 * If the user has the proper access privilidges, the process is 690 * marked 'delayed unlock' and the pages involved in the I/O are 691 * faulted and locked. After the completion of the I/O, the above pages 692 * are unlocked. 693 */ 694 physio(strat, bp, dev, rw, mincnt) 695 int (*strat)(); 696 register struct buf *bp; 697 unsigned (*mincnt)(); 698 { 699 register int c; 700 char *a; 701 702 if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) { 703 u.u_error = EFAULT; 704 return; 705 } 706 (void) spl6(); 707 while (bp->b_flags&B_BUSY) { 708 bp->b_flags |= B_WANTED; 709 sleep((caddr_t)bp, PRIBIO+1); 710 } 711 bp->b_error = 0; 712 bp->b_proc = u.u_procp; 713 bp->b_un.b_addr = u.u_base; 714 while (u.u_count != 0 && bp->b_error==0) { 715 bp->b_flags = B_BUSY | B_PHYS | rw; 716 bp->b_dev = dev; 717 bp->b_blkno = u.u_offset >> PGSHIFT; 718 bp->b_bcount = u.u_count; 719 (*mincnt)(bp); 720 c = bp->b_bcount; 721 u.u_procp->p_flag |= SPHYSIO; 722 vslock(a = bp->b_un.b_addr, c); 723 (*strat)(bp); 724 (void) spl6(); 725 while ((bp->b_flags&B_DONE) == 0) 726 sleep((caddr_t)bp, PRIBIO); 727 vsunlock(a, c, rw); 728 u.u_procp->p_flag &= ~SPHYSIO; 729 if (bp->b_flags&B_WANTED) 730 wakeup((caddr_t)bp); 731 (void) spl0(); 732 bp->b_un.b_addr += c; 733 u.u_count -= c; 734 u.u_offset += c; 735 } 736 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS); 737 u.u_count = bp->b_resid; 738 geterror(bp); 739 } 740 741 /*ARGSUSED*/ 742 unsigned 743 minphys(bp) 744 struct buf *bp; 745 { 746 747 if (bp->b_bcount > 60 * 1024) 748 bp->b_bcount = 60 * 1024; 749 } 750 751 /* 752 * Pick up the device's error number and pass it to the user; 753 * if there is an error but the number is 0 set a generalized 754 * code. Actually the latter is always true because devices 755 * don't yet return specific errors. 756 */ 757 geterror(bp) 758 register struct buf *bp; 759 { 760 761 if (bp->b_flags&B_ERROR) 762 if ((u.u_error = bp->b_error)==0) 763 u.u_error = EIO; 764 } 765