1 /* vfs_cluster.c 4.29 82/04/19 */ 2 3 /* merged into kernel: @(#)bio.c 2.3 4/8/82 */ 4 5 #include "../h/param.h" 6 #include "../h/systm.h" 7 #include "../h/dir.h" 8 #include "../h/user.h" 9 #include "../h/buf.h" 10 #include "../h/conf.h" 11 #include "../h/proc.h" 12 #include "../h/seg.h" 13 #include "../h/pte.h" 14 #include "../h/vm.h" 15 #include "../h/trace.h" 16 17 /* 18 * The following several routines allocate and free 19 * buffers with various side effects. In general the 20 * arguments to an allocate routine are a device and 21 * a block number, and the value is a pointer to 22 * to the buffer header; the buffer is marked "busy" 23 * so that no one else can touch it. If the block was 24 * already in core, no I/O need be done; if it is 25 * already busy, the process waits until it becomes free. 26 * The following routines allocate a buffer: 27 * getblk 28 * bread 29 * breada 30 * baddr (if it is incore) 31 * Eventually the buffer must be released, possibly with the 32 * side effect of writing it out, by using one of 33 * bwrite 34 * bdwrite 35 * bawrite 36 * brelse 37 */ 38 39 struct buf bfreelist[BQUEUES]; 40 struct buf bswlist, *bclnlist; 41 42 #define BUFHSZ 63 43 #define RND (MAXBSIZE/DEV_BSIZE) 44 struct bufhd bufhash[BUFHSZ]; 45 #define BUFHASH(dev, dblkno) \ 46 ((struct buf *)&bufhash[((int)(dev)+(((int)(dblkno))/RND)) % BUFHSZ]) 47 48 /* 49 * Initialize hash links for buffers. 50 */ 51 bhinit() 52 { 53 register int i; 54 register struct bufhd *bp; 55 56 for (bp = bufhash, i = 0; i < BUFHSZ; i++, bp++) 57 bp->b_forw = bp->b_back = (struct buf *)bp; 58 } 59 60 /* #define DISKMON 1 */ 61 62 #ifdef DISKMON 63 struct { 64 int nbuf; 65 long nread; 66 long nreada; 67 long ncache; 68 long nwrite; 69 long bufcount[64]; 70 } io_info; 71 #endif 72 73 /* 74 * Swap IO headers - 75 * They contain the necessary information for the swap I/O. 76 * At any given time, a swap header can be in three 77 * different lists. When free it is in the free list, 78 * when allocated and the I/O queued, it is on the swap 79 * device list, and finally, if the operation was a dirty 80 * page push, when the I/O completes, it is inserted 81 * in a list of cleaned pages to be processed by the pageout daemon. 82 */ 83 struct buf *swbuf; 84 short *swsize; /* CAN WE JUST USE B_BCOUNT? */ 85 int *swpf; 86 87 88 #ifndef UNFAST 89 #define notavail(bp) \ 90 { \ 91 int x = spl6(); \ 92 (bp)->av_back->av_forw = (bp)->av_forw; \ 93 (bp)->av_forw->av_back = (bp)->av_back; \ 94 (bp)->b_flags |= B_BUSY; \ 95 splx(x); \ 96 } 97 #endif 98 99 /* 100 * Read in (if necessary) the block and return a buffer pointer. 101 */ 102 struct buf * 103 bread(dev, blkno, size) 104 dev_t dev; 105 daddr_t blkno; 106 int size; 107 { 108 register struct buf *bp; 109 110 bp = getblk(dev, blkno, size); 111 if (bp->b_flags&B_DONE) { 112 #ifdef TRACE 113 trace(TR_BREADHIT, dev, blkno); 114 #endif 115 #ifdef DISKMON 116 io_info.ncache++; 117 #endif 118 return(bp); 119 } 120 bp->b_flags |= B_READ; 121 (*bdevsw[major(dev)].d_strategy)(bp); 122 #ifdef TRACE 123 trace(TR_BREADMISS, dev, blkno); 124 #endif 125 #ifdef DISKMON 126 io_info.nread++; 127 #endif 128 u.u_vm.vm_inblk++; /* pay for read */ 129 iowait(bp); 130 return(bp); 131 } 132 133 /* 134 * Read in the block, like bread, but also start I/O on the 135 * read-ahead block (which is not allocated to the caller) 136 */ 137 struct buf * 138 breada(dev, blkno, rablkno, size) 139 dev_t dev; 140 daddr_t blkno, rablkno; 141 int size; 142 { 143 register struct buf *bp, *rabp; 144 145 bp = NULL; 146 if (!incore(dev, blkno)) { 147 bp = getblk(dev, blkno, size); 148 if ((bp->b_flags&B_DONE) == 0) { 149 bp->b_flags |= B_READ; 150 (*bdevsw[major(dev)].d_strategy)(bp); 151 #ifdef TRACE 152 trace(TR_BREADMISS, dev, blkno); 153 #endif 154 #ifdef DISKMON 155 io_info.nread++; 156 #endif 157 u.u_vm.vm_inblk++; /* pay for read */ 158 } 159 #ifdef TRACE 160 else 161 trace(TR_BREADHIT, dev, blkno); 162 #endif 163 } 164 if (rablkno && !incore(dev, rablkno)) { 165 rabp = getblk(dev, rablkno, size); 166 if (rabp->b_flags & B_DONE) { 167 brelse(rabp); 168 #ifdef TRACE 169 trace(TR_BREADHITRA, dev, blkno); 170 #endif 171 } else { 172 rabp->b_flags |= B_READ|B_ASYNC; 173 (*bdevsw[major(dev)].d_strategy)(rabp); 174 #ifdef TRACE 175 trace(TR_BREADMISSRA, dev, rablock); 176 #endif 177 #ifdef DISKMON 178 io_info.nreada++; 179 #endif 180 u.u_vm.vm_inblk++; /* pay in advance */ 181 } 182 } 183 if(bp == NULL) 184 return(bread(dev, blkno, size)); 185 iowait(bp); 186 return(bp); 187 } 188 189 /* 190 * Write the buffer, waiting for completion. 191 * Then release the buffer. 192 */ 193 bwrite(bp) 194 register struct buf *bp; 195 { 196 register flag; 197 198 flag = bp->b_flags; 199 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); 200 #ifdef DISKMON 201 io_info.nwrite++; 202 #endif 203 if ((flag&B_DELWRI) == 0) 204 u.u_vm.vm_oublk++; /* noone paid yet */ 205 #ifdef TRACE 206 trace(TR_BWRITE, bp->b_dev, bp->b_blkno); 207 #endif 208 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 209 if ((flag&B_ASYNC) == 0) { 210 iowait(bp); 211 brelse(bp); 212 } else if (flag & B_DELWRI) 213 bp->b_flags |= B_AGE; 214 else 215 geterror(bp); 216 } 217 218 /* 219 * Release the buffer, marking it so that if it is grabbed 220 * for another purpose it will be written out before being 221 * given up (e.g. when writing a partial block where it is 222 * assumed that another write for the same block will soon follow). 223 * This can't be done for magtape, since writes must be done 224 * in the same order as requested. 225 */ 226 bdwrite(bp) 227 register struct buf *bp; 228 { 229 register int flags; 230 231 if ((bp->b_flags&B_DELWRI) == 0) 232 u.u_vm.vm_oublk++; /* noone paid yet */ 233 flags = bdevsw[major(bp->b_dev)].d_flags; 234 if(flags & B_TAPE) 235 bawrite(bp); 236 else { 237 bp->b_flags |= B_DELWRI | B_DONE; 238 brelse(bp); 239 } 240 } 241 242 /* 243 * Release the buffer, start I/O on it, but don't wait for completion. 244 */ 245 bawrite(bp) 246 register struct buf *bp; 247 { 248 249 bp->b_flags |= B_ASYNC; 250 bwrite(bp); 251 } 252 253 /* 254 * release the buffer, with no I/O implied. 255 */ 256 brelse(bp) 257 register struct buf *bp; 258 { 259 register struct buf *flist; 260 register s; 261 262 if (bp->b_flags&B_WANTED) 263 wakeup((caddr_t)bp); 264 if (bfreelist[0].b_flags&B_WANTED) { 265 bfreelist[0].b_flags &= ~B_WANTED; 266 wakeup((caddr_t)bfreelist); 267 } 268 if (bp->b_flags&B_ERROR) 269 if (bp->b_flags & B_LOCKED) 270 bp->b_flags &= ~B_ERROR; /* try again later */ 271 else 272 bp->b_dev = NODEV; /* no assoc */ 273 s = spl6(); 274 if (bp->b_flags & (B_ERROR|B_INVAL)) { 275 /* block has no info ... put at front of most free list */ 276 flist = &bfreelist[BQUEUES-1]; 277 flist->av_forw->av_back = bp; 278 bp->av_forw = flist->av_forw; 279 flist->av_forw = bp; 280 bp->av_back = flist; 281 } else { 282 if (bp->b_flags & B_LOCKED) 283 flist = &bfreelist[BQ_LOCKED]; 284 else if (bp->b_flags & B_AGE) 285 flist = &bfreelist[BQ_AGE]; 286 else 287 flist = &bfreelist[BQ_LRU]; 288 flist->av_back->av_forw = bp; 289 bp->av_back = flist->av_back; 290 flist->av_back = bp; 291 bp->av_forw = flist; 292 } 293 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 294 splx(s); 295 } 296 297 /* 298 * See if the block is associated with some buffer 299 * (mainly to avoid getting hung up on a wait in breada) 300 */ 301 incore(dev, blkno) 302 dev_t dev; 303 daddr_t blkno; 304 { 305 register struct buf *bp; 306 register struct buf *dp; 307 308 dp = BUFHASH(dev, blkno); 309 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 310 if (bp->b_blkno == blkno && bp->b_dev == dev && 311 !(bp->b_flags & B_INVAL)) 312 return (1); 313 return (0); 314 } 315 316 struct buf * 317 baddr(dev, blkno, size) 318 dev_t dev; 319 daddr_t blkno; 320 int size; 321 { 322 323 if (incore(dev, blkno)) 324 return (bread(dev, blkno, size)); 325 return (0); 326 } 327 328 /* 329 * Assign a buffer for the given block. If the appropriate 330 * block is already associated, return it; otherwise search 331 * for the oldest non-busy buffer and reassign it. 332 * 333 * We use splx here because this routine may be called 334 * on the interrupt stack during a dump, and we don't 335 * want to lower the ipl back to 0. 336 */ 337 struct buf * 338 getblk(dev, blkno, size) 339 dev_t dev; 340 daddr_t blkno; 341 int size; 342 { 343 register struct buf *bp, *dp, *ep; 344 #ifdef DISKMON 345 register int i; 346 #endif 347 int s; 348 349 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) 350 blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 351 dp = BUFHASH(dev, blkno); 352 loop: 353 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 354 if (bp->b_blkno != blkno || bp->b_dev != dev || 355 bp->b_flags&B_INVAL) 356 continue; 357 s = spl6(); 358 if (bp->b_flags&B_BUSY) { 359 bp->b_flags |= B_WANTED; 360 sleep((caddr_t)bp, PRIBIO+1); 361 splx(s); 362 goto loop; 363 } 364 splx(s); 365 #ifdef DISKMON 366 i = 0; 367 dp = bp->av_forw; 368 while ((dp->b_flags & B_HEAD) == 0) { 369 i++; 370 dp = dp->av_forw; 371 } 372 if (i<64) 373 io_info.bufcount[i]++; 374 #endif 375 notavail(bp); 376 brealloc(bp, size); 377 bp->b_flags |= B_CACHE; 378 return(bp); 379 } 380 if (major(dev) >= nblkdev) 381 panic("blkdev"); 382 s = spl6(); 383 for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--) 384 if (ep->av_forw != ep) 385 break; 386 if (ep == bfreelist) { /* no free blocks at all */ 387 ep->b_flags |= B_WANTED; 388 sleep((caddr_t)ep, PRIBIO+1); 389 splx(s); 390 goto loop; 391 } 392 splx(s); 393 bp = ep->av_forw; 394 notavail(bp); 395 if (bp->b_flags & B_DELWRI) { 396 bp->b_flags |= B_ASYNC; 397 bwrite(bp); 398 goto loop; 399 } 400 #ifdef TRACE 401 trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 402 #endif 403 bp->b_flags = B_BUSY; 404 bfree(bp); 405 bp->b_back->b_forw = bp->b_forw; 406 bp->b_forw->b_back = bp->b_back; 407 bp->b_forw = dp->b_forw; 408 bp->b_back = dp; 409 dp->b_forw->b_back = bp; 410 dp->b_forw = bp; 411 bp->b_dev = dev; 412 bp->b_blkno = blkno; 413 brealloc(bp, size); 414 return(bp); 415 } 416 417 /* 418 * get an empty block, 419 * not assigned to any particular device 420 */ 421 struct buf * 422 geteblk(size) 423 int size; 424 { 425 register struct buf *bp, *dp; 426 int s; 427 428 loop: 429 s = spl6(); 430 for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--) 431 if (dp->av_forw != dp) 432 break; 433 if (dp == bfreelist) { /* no free blocks */ 434 dp->b_flags |= B_WANTED; 435 sleep((caddr_t)dp, PRIBIO+1); 436 goto loop; 437 } 438 splx(s); 439 bp = dp->av_forw; 440 notavail(bp); 441 if (bp->b_flags & B_DELWRI) { 442 bp->b_flags |= B_ASYNC; 443 bwrite(bp); 444 goto loop; 445 } 446 #ifdef TRACE 447 trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 448 #endif 449 bp->b_flags = B_BUSY|B_INVAL; 450 bp->b_back->b_forw = bp->b_forw; 451 bp->b_forw->b_back = bp->b_back; 452 bp->b_forw = dp->b_forw; 453 bp->b_back = dp; 454 dp->b_forw->b_back = bp; 455 dp->b_forw = bp; 456 bp->b_dev = (dev_t)NODEV; 457 bp->b_bcount = size; 458 return(bp); 459 } 460 461 /* 462 * Allocate space associated with a buffer. 463 */ 464 brealloc(bp, size) 465 register struct buf *bp; 466 int size; 467 { 468 daddr_t start, last; 469 register struct buf *ep; 470 struct buf *dp; 471 int s; 472 473 /* 474 * First need to make sure that all overlaping previous I/O 475 * is dispatched with. 476 */ 477 if (size == bp->b_bcount) 478 return; 479 if (size < bp->b_bcount) { 480 bp->b_bcount = size; 481 return; 482 } 483 start = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); 484 last = bp->b_blkno + (size / DEV_BSIZE) - 1; 485 if (bp->b_bcount == 0) { 486 start++; 487 if (start == last) 488 goto allocit; 489 } 490 dp = BUFHASH(bp->b_dev, bp->b_blkno); 491 loop: 492 (void) spl0(); 493 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 494 if (ep->b_blkno < start || ep->b_blkno > last || 495 ep->b_dev != bp->b_dev || ep->b_flags&B_INVAL) 496 continue; 497 s = spl6(); 498 if (ep->b_flags&B_BUSY) { 499 ep->b_flags |= B_WANTED; 500 sleep((caddr_t)ep, PRIBIO+1); 501 splx(s); 502 goto loop; 503 } 504 (void) spl0(); 505 /* 506 * What we would really like to do is kill this 507 * I/O since it is now useless. We cannot do that 508 * so we force it to complete, so that it cannot 509 * over-write our useful data later. 510 */ 511 if (ep->b_flags & B_DELWRI) { 512 notavail(ep); 513 ep->b_flags |= B_ASYNC; 514 bwrite(ep); 515 goto loop; 516 } 517 } 518 allocit: 519 /* 520 * Here the buffer is already available, so all we 521 * need to do is set the size. Someday a better memory 522 * management scheme will be implemented. 523 */ 524 bp->b_bcount = size; 525 } 526 527 /* 528 * Release space associated with a buffer. 529 */ 530 bfree(bp) 531 struct buf *bp; 532 { 533 /* 534 * Here the buffer does not change, so all we 535 * need to do is set the size. Someday a better memory 536 * management scheme will be implemented. 537 */ 538 bp->b_bcount = 0; 539 } 540 541 /* 542 * Wait for I/O completion on the buffer; return errors 543 * to the user. 544 */ 545 iowait(bp) 546 register struct buf *bp; 547 { 548 int s; 549 550 s = spl6(); 551 while ((bp->b_flags&B_DONE)==0) 552 sleep((caddr_t)bp, PRIBIO); 553 splx(s); 554 geterror(bp); 555 } 556 557 #ifdef UNFAST 558 /* 559 * Unlink a buffer from the available list and mark it busy. 560 * (internal interface) 561 */ 562 notavail(bp) 563 register struct buf *bp; 564 { 565 register s; 566 567 s = spl6(); 568 bp->av_back->av_forw = bp->av_forw; 569 bp->av_forw->av_back = bp->av_back; 570 bp->b_flags |= B_BUSY; 571 splx(s); 572 } 573 #endif 574 575 /* 576 * Mark I/O complete on a buffer. If the header 577 * indicates a dirty page push completion, the 578 * header is inserted into the ``cleaned'' list 579 * to be processed by the pageout daemon. Otherwise 580 * release it if I/O is asynchronous, and wake 581 * up anyone waiting for it. 582 */ 583 iodone(bp) 584 register struct buf *bp; 585 { 586 register int s; 587 588 if (bp->b_flags & B_DONE) 589 panic("dup iodone"); 590 bp->b_flags |= B_DONE; 591 if (bp->b_flags & B_DIRTY) { 592 if (bp->b_flags & B_ERROR) 593 panic("IO err in push"); 594 s = spl6(); 595 bp->av_forw = bclnlist; 596 bp->b_bcount = swsize[bp - swbuf]; 597 bp->b_pfcent = swpf[bp - swbuf]; 598 cnt.v_pgout++; 599 cnt.v_pgpgout += bp->b_bcount / NBPG; 600 bclnlist = bp; 601 if (bswlist.b_flags & B_WANTED) 602 wakeup((caddr_t)&proc[2]); 603 splx(s); 604 return; 605 } 606 if (bp->b_flags&B_ASYNC) 607 brelse(bp); 608 else { 609 bp->b_flags &= ~B_WANTED; 610 wakeup((caddr_t)bp); 611 } 612 } 613 614 /* 615 * Zero the core associated with a buffer. 616 */ 617 clrbuf(bp) 618 struct buf *bp; 619 { 620 register int *p; 621 register int c; 622 623 p = bp->b_un.b_words; 624 c = bp->b_bcount/sizeof(int); 625 do 626 *p++ = 0; 627 while (--c); 628 bp->b_resid = 0; 629 } 630 631 /* 632 * swap I/O - 633 * 634 * If the flag indicates a dirty page push initiated 635 * by the pageout daemon, we map the page into the i th 636 * virtual page of process 2 (the daemon itself) where i is 637 * the index of the swap header that has been allocated. 638 * We simply initialize the header and queue the I/O but 639 * do not wait for completion. When the I/O completes, 640 * iodone() will link the header to a list of cleaned 641 * pages to be processed by the pageout daemon. 642 */ 643 swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) 644 struct proc *p; 645 swblk_t dblkno; 646 caddr_t addr; 647 int flag, nbytes; 648 dev_t dev; 649 unsigned pfcent; 650 { 651 register struct buf *bp; 652 register int c; 653 int p2dp; 654 register struct pte *dpte, *vpte; 655 int s; 656 657 s = spl6(); 658 while (bswlist.av_forw == NULL) { 659 bswlist.b_flags |= B_WANTED; 660 sleep((caddr_t)&bswlist, PSWP+1); 661 } 662 bp = bswlist.av_forw; 663 bswlist.av_forw = bp->av_forw; 664 splx(s); 665 666 bp->b_flags = B_BUSY | B_PHYS | rdflg | flag; 667 if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0) 668 if (rdflg == B_READ) 669 sum.v_pswpin += btoc(nbytes); 670 else 671 sum.v_pswpout += btoc(nbytes); 672 bp->b_proc = p; 673 if (flag & B_DIRTY) { 674 p2dp = ((bp - swbuf) * CLSIZE) * KLMAX; 675 dpte = dptopte(&proc[2], p2dp); 676 vpte = vtopte(p, btop(addr)); 677 for (c = 0; c < nbytes; c += NBPG) { 678 if (vpte->pg_pfnum == 0 || vpte->pg_fod) 679 panic("swap bad pte"); 680 *dpte++ = *vpte++; 681 } 682 bp->b_un.b_addr = (caddr_t)ctob(p2dp); 683 } else 684 bp->b_un.b_addr = addr; 685 while (nbytes > 0) { 686 c = imin(ctob(120), nbytes); 687 bp->b_bcount = c; 688 bp->b_blkno = dblkno; 689 bp->b_dev = dev; 690 if (flag & B_DIRTY) { 691 swpf[bp - swbuf] = pfcent; 692 swsize[bp - swbuf] = nbytes; 693 } 694 #ifdef TRACE 695 trace(TR_SWAPIO, dev, bp->b_blkno); 696 #endif 697 (*bdevsw[major(dev)].d_strategy)(bp); 698 if (flag & B_DIRTY) { 699 if (c < nbytes) 700 panic("big push"); 701 return; 702 } 703 s = spl6(); 704 while((bp->b_flags&B_DONE)==0) 705 sleep((caddr_t)bp, PSWP); 706 splx(s); 707 bp->b_un.b_addr += c; 708 bp->b_flags &= ~B_DONE; 709 if (bp->b_flags & B_ERROR) { 710 if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE) 711 panic("hard IO err in swap"); 712 swkill(p, (char *)0); 713 } 714 nbytes -= c; 715 dblkno += btoc(c); 716 } 717 s = spl6(); 718 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 719 bp->av_forw = bswlist.av_forw; 720 bswlist.av_forw = bp; 721 if (bswlist.b_flags & B_WANTED) { 722 bswlist.b_flags &= ~B_WANTED; 723 wakeup((caddr_t)&bswlist); 724 wakeup((caddr_t)&proc[2]); 725 } 726 splx(s); 727 } 728 729 /* 730 * If rout == 0 then killed on swap error, else 731 * rout is the name of the routine where we ran out of 732 * swap space. 733 */ 734 swkill(p, rout) 735 struct proc *p; 736 char *rout; 737 { 738 char *mesg; 739 740 printf("pid %d: ", p->p_pid); 741 if (rout) 742 printf(mesg = "killed due to no swap space\n"); 743 else 744 printf(mesg = "killed on swap error\n"); 745 uprintf("sorry, pid %d was %s", p->p_pid, mesg); 746 /* 747 * To be sure no looping (e.g. in vmsched trying to 748 * swap out) mark process locked in core (as though 749 * done by user) after killing it so noone will try 750 * to swap it out. 751 */ 752 psignal(p, SIGKILL); 753 p->p_flag |= SULOCK; 754 } 755 756 /* 757 * make sure all write-behind blocks 758 * on dev (or NODEV for all) 759 * are flushed out. 760 * (from umount and update) 761 * (and temporarily pagein) 762 */ 763 bflush(dev) 764 dev_t dev; 765 { 766 register struct buf *bp; 767 register struct buf *flist; 768 int s; 769 770 loop: 771 s = spl6(); 772 for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++) 773 for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 774 if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) { 775 bp->b_flags |= B_ASYNC; 776 notavail(bp); 777 bwrite(bp); 778 goto loop; 779 } 780 } 781 splx(s); 782 } 783 784 /* 785 * Raw I/O. The arguments are 786 * The strategy routine for the device 787 * A buffer, which will always be a special buffer 788 * header owned exclusively by the device for this purpose 789 * The device number 790 * Read/write flag 791 * Essentially all the work is computing physical addresses and 792 * validating them. 793 * If the user has the proper access privilidges, the process is 794 * marked 'delayed unlock' and the pages involved in the I/O are 795 * faulted and locked. After the completion of the I/O, the above pages 796 * are unlocked. 797 */ 798 physio(strat, bp, dev, rw, mincnt) 799 int (*strat)(); 800 register struct buf *bp; 801 unsigned (*mincnt)(); 802 { 803 register int c; 804 char *a; 805 int s; 806 807 if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) { 808 u.u_error = EFAULT; 809 return; 810 } 811 s = spl6(); 812 while (bp->b_flags&B_BUSY) { 813 bp->b_flags |= B_WANTED; 814 sleep((caddr_t)bp, PRIBIO+1); 815 } 816 splx(s); 817 bp->b_error = 0; 818 bp->b_proc = u.u_procp; 819 bp->b_un.b_addr = u.u_base; 820 while (u.u_count != 0) { 821 bp->b_flags = B_BUSY | B_PHYS | rw; 822 bp->b_dev = dev; 823 bp->b_blkno = u.u_offset >> PGSHIFT; 824 bp->b_bcount = u.u_count; 825 (*mincnt)(bp); 826 c = bp->b_bcount; 827 u.u_procp->p_flag |= SPHYSIO; 828 vslock(a = bp->b_un.b_addr, c); 829 (*strat)(bp); 830 (void) spl6(); 831 while ((bp->b_flags&B_DONE) == 0) 832 sleep((caddr_t)bp, PRIBIO); 833 vsunlock(a, c, rw); 834 u.u_procp->p_flag &= ~SPHYSIO; 835 if (bp->b_flags&B_WANTED) 836 wakeup((caddr_t)bp); 837 splx(s); 838 bp->b_un.b_addr += c; 839 u.u_count -= c; 840 u.u_offset += c; 841 if (bp->b_flags&B_ERROR) 842 break; 843 } 844 bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS); 845 u.u_count = bp->b_resid; 846 geterror(bp); 847 } 848 849 /*ARGSUSED*/ 850 unsigned 851 minphys(bp) 852 struct buf *bp; 853 { 854 855 if (bp->b_bcount > 63 * 1024) 856 bp->b_bcount = 63 * 1024; 857 } 858 859 860 /* 861 * Pick up the device's error number and pass it to the user; 862 * if there is an error but the number is 0 set a generalized 863 * code. Actually the latter is always true because devices 864 * don't yet return specific errors. 865 */ 866 geterror(bp) 867 register struct buf *bp; 868 { 869 870 if (bp->b_flags&B_ERROR) 871 if ((u.u_error = bp->b_error)==0) 872 u.u_error = EIO; 873 } 874 875 /* 876 * Invalidate in core blocks belonging to closed or umounted filesystem 877 * 878 * This is not nicely done at all - the buffer ought to be removed from the 879 * hash chains & have its dev/blkno fields clobbered, but unfortunately we 880 * can't do that here, as it is quite possible that the block is still 881 * being used for i/o. Eventually, all disc drivers should be forced to 882 * have a close routine, which ought ensure that the queue is empty, then 883 * properly flush the queues. Until that happy day, this suffices for 884 * correctness. ... kre 885 */ 886 binval(dev) 887 dev_t dev; 888 { 889 register struct buf *bp; 890 register struct bufhd *hp; 891 #define dp ((struct buf *)hp) 892 893 for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 894 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 895 if (bp->b_dev == dev) 896 bp->b_flags |= B_INVAL; 897 } 898