xref: /original-bsd/sys/kern/vfs_bio.c (revision 42ef0c70)
1 /*	vfs_bio.c	4.25	82/01/17	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/dir.h"
6 #include "../h/user.h"
7 #include "../h/buf.h"
8 #include "../h/conf.h"
9 #include "../h/proc.h"
10 #include "../h/seg.h"
11 #include "../h/pte.h"
12 #include "../h/vm.h"
13 #include "../h/trace.h"
14 
15 /*
16  * The following several routines allocate and free
17  * buffers with various side effects.  In general the
18  * arguments to an allocate routine are a device and
19  * a block number, and the value is a pointer to
20  * to the buffer header; the buffer is marked "busy"
21  * so that no one else can touch it.  If the block was
22  * already in core, no I/O need be done; if it is
23  * already busy, the process waits until it becomes free.
24  * The following routines allocate a buffer:
25  *	getblk
26  *	bread
27  *	breada
28  *	baddr	(if it is incore)
29  * Eventually the buffer must be released, possibly with the
30  * side effect of writing it out, by using one of
31  *	bwrite
32  *	bdwrite
33  *	bawrite
34  *	brelse
35  */
36 
37 struct	buf bfreelist[BQUEUES];
38 struct	buf bswlist, *bclnlist;
39 
40 #define	BUFHSZ	63
41 struct	bufhd bufhash[BUFHSZ];
42 #define	BUFHASH(dev, dblkno)	\
43 		((struct buf *)&bufhash[((int)(dev)+(int)(dblkno)) % BUFHSZ])
44 
45 /*
46  * Initialize hash links for buffers.
47  */
48 bhinit()
49 {
50 	register int i;
51 	register struct bufhd *bp;
52 
53 	for (bp = bufhash, i = 0; i < BUFHSZ; i++, bp++)
54 		bp->b_forw = bp->b_back = (struct buf *)bp;
55 }
56 
57 /* #define	DISKMON	1 */
58 
59 #ifdef	DISKMON
60 struct {
61 	int	nbuf;
62 	long	nread;
63 	long	nreada;
64 	long	ncache;
65 	long	nwrite;
66 	long	bufcount[64];
67 } io_info;
68 #endif
69 
70 /*
71  * Swap IO headers -
72  * They contain the necessary information for the swap I/O.
73  * At any given time, a swap header can be in three
74  * different lists. When free it is in the free list,
75  * when allocated and the I/O queued, it is on the swap
76  * device list, and finally, if the operation was a dirty
77  * page push, when the I/O completes, it is inserted
78  * in a list of cleaned pages to be processed by the pageout daemon.
79  */
80 struct	buf *swbuf;
81 short	*swsize;		/* CAN WE JUST USE B_BCOUNT? */
82 int	*swpf;
83 
84 
85 #ifndef	UNFAST
86 #define	notavail(bp) \
87 { \
88 	int s = spl6(); \
89 	(bp)->av_back->av_forw = (bp)->av_forw; \
90 	(bp)->av_forw->av_back = (bp)->av_back; \
91 	(bp)->b_flags |= B_BUSY; \
92 	splx(s); \
93 }
94 #endif
95 
96 /*
97  * Read in (if necessary) the block and return a buffer pointer.
98  */
99 struct buf *
100 bread(dev, blkno)
101 dev_t dev;
102 daddr_t blkno;
103 {
104 	register struct buf *bp;
105 
106 	bp = getblk(dev, blkno);
107 	if (bp->b_flags&B_DONE) {
108 #ifdef	TRACE
109 		trace(TR_BREADHIT, dev, blkno);
110 #endif
111 #ifdef	DISKMON
112 		io_info.ncache++;
113 #endif
114 		return(bp);
115 	}
116 	bp->b_flags |= B_READ;
117 	bp->b_bcount = BSIZE;
118 	(*bdevsw[major(dev)].d_strategy)(bp);
119 #ifdef	TRACE
120 	trace(TR_BREADMISS, dev, blkno);
121 #endif
122 #ifdef	DISKMON
123 	io_info.nread++;
124 #endif
125 	u.u_vm.vm_inblk++;		/* pay for read */
126 	iowait(bp);
127 	return(bp);
128 }
129 
130 /*
131  * Read in the block, like bread, but also start I/O on the
132  * read-ahead block (which is not allocated to the caller)
133  */
134 struct buf *
135 breada(dev, blkno, rablkno)
136 dev_t dev;
137 daddr_t blkno, rablkno;
138 {
139 	register struct buf *bp, *rabp;
140 
141 	bp = NULL;
142 	if (!incore(dev, blkno)) {
143 		bp = getblk(dev, blkno);
144 		if ((bp->b_flags&B_DONE) == 0) {
145 			bp->b_flags |= B_READ;
146 			bp->b_bcount = BSIZE;
147 			(*bdevsw[major(dev)].d_strategy)(bp);
148 #ifdef	TRACE
149 			trace(TR_BREADMISS, dev, blkno);
150 #endif
151 #ifdef	DISKMON
152 			io_info.nread++;
153 #endif
154 			u.u_vm.vm_inblk++;		/* pay for read */
155 		}
156 #ifdef	TRACE
157 		else
158 			trace(TR_BREADHIT, dev, blkno);
159 #endif
160 	}
161 	if (rablkno && !incore(dev, rablkno)) {
162 		rabp = getblk(dev, rablkno);
163 		if (rabp->b_flags & B_DONE) {
164 			brelse(rabp);
165 #ifdef	TRACE
166 			trace(TR_BREADHITRA, dev, blkno);
167 #endif
168 		} else {
169 			rabp->b_flags |= B_READ|B_ASYNC;
170 			rabp->b_bcount = BSIZE;
171 			(*bdevsw[major(dev)].d_strategy)(rabp);
172 #ifdef	TRACE
173 			trace(TR_BREADMISSRA, dev, rablock);
174 #endif
175 #ifdef	DISKMON
176 			io_info.nreada++;
177 #endif
178 			u.u_vm.vm_inblk++;		/* pay in advance */
179 		}
180 	}
181 	if(bp == NULL)
182 		return(bread(dev, blkno));
183 	iowait(bp);
184 	return(bp);
185 }
186 
187 /*
188  * Write the buffer, waiting for completion.
189  * Then release the buffer.
190  */
191 bwrite(bp)
192 register struct buf *bp;
193 {
194 	register flag;
195 
196 	flag = bp->b_flags;
197 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
198 	bp->b_bcount = BSIZE;
199 #ifdef	DISKMON
200 	io_info.nwrite++;
201 #endif
202 	if ((flag&B_DELWRI) == 0)
203 		u.u_vm.vm_oublk++;		/* noone paid yet */
204 #ifdef	TRACE
205 	trace(TR_BWRITE, bp->b_dev, bp->b_blkno);
206 #endif
207 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
208 	if ((flag&B_ASYNC) == 0) {
209 		iowait(bp);
210 		brelse(bp);
211 	} else if (flag & B_DELWRI)
212 		bp->b_flags |= B_AGE;
213 	else
214 		geterror(bp);
215 }
216 
217 /*
218  * Release the buffer, marking it so that if it is grabbed
219  * for another purpose it will be written out before being
220  * given up (e.g. when writing a partial block where it is
221  * assumed that another write for the same block will soon follow).
222  * This can't be done for magtape, since writes must be done
223  * in the same order as requested.
224  */
225 bdwrite(bp)
226 register struct buf *bp;
227 {
228 	register int flags;
229 
230 	if ((bp->b_flags&B_DELWRI) == 0)
231 		u.u_vm.vm_oublk++;		/* noone paid yet */
232 	flags = bdevsw[major(bp->b_dev)].d_flags;
233 	if(flags & B_TAPE)
234 		bawrite(bp);
235 	else {
236 		bp->b_flags |= B_DELWRI | B_DONE;
237 		brelse(bp);
238 	}
239 }
240 
241 /*
242  * Release the buffer, start I/O on it, but don't wait for completion.
243  */
244 bawrite(bp)
245 register struct buf *bp;
246 {
247 
248 	bp->b_flags |= B_ASYNC;
249 	bwrite(bp);
250 }
251 
252 /*
253  * release the buffer, with no I/O implied.
254  */
255 brelse(bp)
256 register struct buf *bp;
257 {
258 	register struct buf *flist;
259 	register s;
260 
261 	if (bp->b_flags&B_WANTED)
262 		wakeup((caddr_t)bp);
263 	if (bfreelist[0].b_flags&B_WANTED) {
264 		bfreelist[0].b_flags &= ~B_WANTED;
265 		wakeup((caddr_t)bfreelist);
266 	}
267 	if (bp->b_flags&B_ERROR)
268 		if (bp->b_flags & B_LOCKED)
269 			bp->b_flags &= ~B_ERROR;	/* try again later */
270 		else
271 			bp->b_dev = NODEV;  		/* no assoc */
272 	s = spl6();
273 	if (bp->b_flags & (B_ERROR|B_INVAL)) {
274 		/* block has no info ... put at front of most free list */
275 		flist = &bfreelist[BQUEUES-1];
276 		flist->av_forw->av_back = bp;
277 		bp->av_forw = flist->av_forw;
278 		flist->av_forw = bp;
279 		bp->av_back = flist;
280 	} else {
281 		if (bp->b_flags & B_LOCKED)
282 			flist = &bfreelist[BQ_LOCKED];
283 		else if (bp->b_flags & B_AGE)
284 			flist = &bfreelist[BQ_AGE];
285 		else
286 			flist = &bfreelist[BQ_LRU];
287 		flist->av_back->av_forw = bp;
288 		bp->av_back = flist->av_back;
289 		flist->av_back = bp;
290 		bp->av_forw = flist;
291 	}
292 	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
293 	splx(s);
294 }
295 
296 /*
297  * See if the block is associated with some buffer
298  * (mainly to avoid getting hung up on a wait in breada)
299  */
300 incore(dev, blkno)
301 dev_t dev;
302 daddr_t blkno;
303 {
304 	register struct buf *bp;
305 	register struct buf *dp;
306 	register int dblkno = fsbtodb(blkno);
307 
308 	dp = BUFHASH(dev, dblkno);
309 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
310 		if (bp->b_blkno == dblkno && bp->b_dev == dev &&
311 		    !(bp->b_flags & B_INVAL))
312 			return (1);
313 	return (0);
314 }
315 
316 struct buf *
317 baddr(dev, blkno)
318 dev_t dev;
319 daddr_t blkno;
320 {
321 
322 	if (incore(dev, blkno))
323 		return (bread(dev, blkno));
324 	return (0);
325 }
326 
327 /*
328  * Assign a buffer for the given block.  If the appropriate
329  * block is already associated, return it; otherwise search
330  * for the oldest non-busy buffer and reassign it.
331  *
332  * We use splx here because this routine may be called
333  * on the interrupt stack during a dump, and we don't
334  * want to lower the ipl back to 0.
335  */
336 struct buf *
337 getblk(dev, blkno)
338 dev_t dev;
339 daddr_t blkno;
340 {
341 	register struct buf *bp, *dp, *ep;
342 	register int dblkno = fsbtodb(blkno);
343 #ifdef	DISKMON
344 	register int i;
345 #endif
346 	int s;
347 
348 	if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT))
349 		blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1);
350 	dblkno = fsbtodb(blkno);
351 	dp = BUFHASH(dev, dblkno);
352     loop:
353 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
354 		if (bp->b_blkno != dblkno || bp->b_dev != dev ||
355 		    bp->b_flags&B_INVAL)
356 			continue;
357 		s = spl6();
358 		if (bp->b_flags&B_BUSY) {
359 			bp->b_flags |= B_WANTED;
360 			sleep((caddr_t)bp, PRIBIO+1);
361 			splx(s);
362 			goto loop;
363 		}
364 		splx(s);
365 #ifdef	DISKMON
366 		i = 0;
367 		dp = bp->av_forw;
368 		while ((dp->b_flags & B_HEAD) == 0) {
369 			i++;
370 			dp = dp->av_forw;
371 		}
372 		if (i<64)
373 			io_info.bufcount[i]++;
374 #endif
375 		notavail(bp);
376 		bp->b_flags |= B_CACHE;
377 		return(bp);
378 	}
379 	if (major(dev) >= nblkdev)
380 		panic("blkdev");
381 	s = spl6();
382 	for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--)
383 		if (ep->av_forw != ep)
384 			break;
385 	if (ep == bfreelist) {		/* no free blocks at all */
386 		ep->b_flags |= B_WANTED;
387 		sleep((caddr_t)ep, PRIBIO+1);
388 		splx(s);
389 		goto loop;
390 	}
391 	splx(s);
392 	bp = ep->av_forw;
393 	notavail(bp);
394 	if (bp->b_flags & B_DELWRI) {
395 		bp->b_flags |= B_ASYNC;
396 		bwrite(bp);
397 		goto loop;
398 	}
399 #ifdef TRACE
400 	trace(TR_BRELSE, bp->b_dev, bp->b_blkno);
401 #endif
402 	bp->b_flags = B_BUSY;
403 	bp->b_back->b_forw = bp->b_forw;
404 	bp->b_forw->b_back = bp->b_back;
405 	bp->b_forw = dp->b_forw;
406 	bp->b_back = dp;
407 	dp->b_forw->b_back = bp;
408 	dp->b_forw = bp;
409 	bp->b_dev = dev;
410 	bp->b_blkno = dblkno;
411 	return(bp);
412 }
413 
414 /*
415  * get an empty block,
416  * not assigned to any particular device
417  */
418 struct buf *
419 geteblk()
420 {
421 	register struct buf *bp, *dp;
422 	int s;
423 
424 loop:
425 	s = spl6();
426 	for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--)
427 		if (dp->av_forw != dp)
428 			break;
429 	if (dp == bfreelist) {		/* no free blocks */
430 		dp->b_flags |= B_WANTED;
431 		sleep((caddr_t)dp, PRIBIO+1);
432 		goto loop;
433 	}
434 	splx(s);
435 	bp = dp->av_forw;
436 	notavail(bp);
437 	if (bp->b_flags & B_DELWRI) {
438 		bp->b_flags |= B_ASYNC;
439 		bwrite(bp);
440 		goto loop;
441 	}
442 #ifdef TRACE
443 	trace(TR_BRELSE, bp->b_dev, bp->b_blkno);
444 #endif
445 	bp->b_flags = B_BUSY|B_INVAL;
446 	bp->b_back->b_forw = bp->b_forw;
447 	bp->b_forw->b_back = bp->b_back;
448 	bp->b_forw = dp->b_forw;
449 	bp->b_back = dp;
450 	dp->b_forw->b_back = bp;
451 	dp->b_forw = bp;
452 	bp->b_dev = (dev_t)NODEV;
453 	return(bp);
454 }
455 
456 /*
457  * Wait for I/O completion on the buffer; return errors
458  * to the user.
459  */
460 iowait(bp)
461 register struct buf *bp;
462 {
463 	int s;
464 
465 	s = spl6();
466 	while ((bp->b_flags&B_DONE)==0)
467 		sleep((caddr_t)bp, PRIBIO);
468 	splx(s);
469 	geterror(bp);
470 }
471 
472 #ifdef UNFAST
473 /*
474  * Unlink a buffer from the available list and mark it busy.
475  * (internal interface)
476  */
477 notavail(bp)
478 register struct buf *bp;
479 {
480 	register s;
481 
482 	s = spl6();
483 	bp->av_back->av_forw = bp->av_forw;
484 	bp->av_forw->av_back = bp->av_back;
485 	bp->b_flags |= B_BUSY;
486 	splx(s);
487 }
488 #endif
489 
490 /*
491  * Mark I/O complete on a buffer. If the header
492  * indicates a dirty page push completion, the
493  * header is inserted into the ``cleaned'' list
494  * to be processed by the pageout daemon. Otherwise
495  * release it if I/O is asynchronous, and wake
496  * up anyone waiting for it.
497  */
498 iodone(bp)
499 register struct buf *bp;
500 {
501 	register int s;
502 
503 	if (bp->b_flags & B_DONE)
504 		panic("dup iodone");
505 	bp->b_flags |= B_DONE;
506 	if (bp->b_flags & B_DIRTY) {
507 		if (bp->b_flags & B_ERROR)
508 			panic("IO err in push");
509 		s = spl6();
510 		bp->av_forw = bclnlist;
511 		bp->b_bcount = swsize[bp - swbuf];
512 		bp->b_pfcent = swpf[bp - swbuf];
513 		cnt.v_pgout++;
514 		cnt.v_pgpgout += bp->b_bcount / NBPG;
515 		bclnlist = bp;
516 		if (bswlist.b_flags & B_WANTED)
517 			wakeup((caddr_t)&proc[2]);
518 		splx(s);
519 		return;
520 	}
521 	if (bp->b_flags&B_ASYNC)
522 		brelse(bp);
523 	else {
524 		bp->b_flags &= ~B_WANTED;
525 		wakeup((caddr_t)bp);
526 	}
527 }
528 
529 /*
530  * Zero the core associated with a buffer.
531  */
532 clrbuf(bp)
533 struct buf *bp;
534 {
535 	register *p;
536 	register c;
537 
538 	p = bp->b_un.b_words;
539 	c = BSIZE/sizeof(int);
540 	do
541 		*p++ = 0;
542 	while (--c);
543 	bp->b_resid = 0;
544 }
545 
546 /*
547  * swap I/O -
548  *
549  * If the flag indicates a dirty page push initiated
550  * by the pageout daemon, we map the page into the i th
551  * virtual page of process 2 (the daemon itself) where i is
552  * the index of the swap header that has been allocated.
553  * We simply initialize the header and queue the I/O but
554  * do not wait for completion. When the I/O completes,
555  * iodone() will link the header to a list of cleaned
556  * pages to be processed by the pageout daemon.
557  */
558 swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent)
559 	struct proc *p;
560 	swblk_t dblkno;
561 	caddr_t addr;
562 	int flag, nbytes;
563 	dev_t dev;
564 	unsigned pfcent;
565 {
566 	register struct buf *bp;
567 	register int c;
568 	int p2dp;
569 	register struct pte *dpte, *vpte;
570 	int s;
571 
572 	s = spl6();
573 	while (bswlist.av_forw == NULL) {
574 		bswlist.b_flags |= B_WANTED;
575 		sleep((caddr_t)&bswlist, PSWP+1);
576 	}
577 	bp = bswlist.av_forw;
578 	bswlist.av_forw = bp->av_forw;
579 	splx(s);
580 
581 	bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
582 	if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
583 		if (rdflg == B_READ)
584 			sum.v_pswpin += btoc(nbytes);
585 		else
586 			sum.v_pswpout += btoc(nbytes);
587 	bp->b_proc = p;
588 	if (flag & B_DIRTY) {
589 		p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
590 		dpte = dptopte(&proc[2], p2dp);
591 		vpte = vtopte(p, btop(addr));
592 		for (c = 0; c < nbytes; c += NBPG) {
593 			if (vpte->pg_pfnum == 0 || vpte->pg_fod)
594 				panic("swap bad pte");
595 			*dpte++ = *vpte++;
596 		}
597 		bp->b_un.b_addr = (caddr_t)ctob(p2dp);
598 	} else
599 		bp->b_un.b_addr = addr;
600 	while (nbytes > 0) {
601 		c = imin(ctob(120), nbytes);
602 		bp->b_bcount = c;
603 		bp->b_blkno = dblkno;
604 		bp->b_dev = dev;
605 		if (flag & B_DIRTY) {
606 			swpf[bp - swbuf] = pfcent;
607 			swsize[bp - swbuf] = nbytes;
608 		}
609 #ifdef TRACE
610 		trace(TR_SWAPIO, dev, bp->b_blkno);
611 #endif
612 		(*bdevsw[major(dev)].d_strategy)(bp);
613 		if (flag & B_DIRTY) {
614 			if (c < nbytes)
615 				panic("big push");
616 			return;
617 		}
618 		s = spl6();
619 		while((bp->b_flags&B_DONE)==0)
620 			sleep((caddr_t)bp, PSWP);
621 		splx(s);
622 		bp->b_un.b_addr += c;
623 		bp->b_flags &= ~B_DONE;
624 		if (bp->b_flags & B_ERROR) {
625 			if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
626 				panic("hard IO err in swap");
627 			swkill(p, (char *)0);
628 		}
629 		nbytes -= c;
630 		dblkno += btoc(c);
631 	}
632 	s = spl6();
633 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
634 	bp->av_forw = bswlist.av_forw;
635 	bswlist.av_forw = bp;
636 	if (bswlist.b_flags & B_WANTED) {
637 		bswlist.b_flags &= ~B_WANTED;
638 		wakeup((caddr_t)&bswlist);
639 		wakeup((caddr_t)&proc[2]);
640 	}
641 	splx(s);
642 }
643 
644 /*
645  * If rout == 0 then killed on swap error, else
646  * rout is the name of the routine where we ran out of
647  * swap space.
648  */
649 swkill(p, rout)
650 	struct proc *p;
651 	char *rout;
652 {
653 	char *mesg;
654 
655 	printf("pid %d: ", p->p_pid);
656 	if (rout)
657 		printf(mesg = "killed due to no swap space\n");
658 	else
659 		printf(mesg = "killed on swap error\n");
660 	uprintf("sorry, pid %d was %s", p->p_pid, mesg);
661 	/*
662 	 * To be sure no looping (e.g. in vmsched trying to
663 	 * swap out) mark process locked in core (as though
664 	 * done by user) after killing it so noone will try
665 	 * to swap it out.
666 	 */
667 	psignal(p, SIGKILL);
668 	p->p_flag |= SULOCK;
669 }
670 
671 /*
672  * make sure all write-behind blocks
673  * on dev (or NODEV for all)
674  * are flushed out.
675  * (from umount and update)
676  */
677 bflush(dev)
678 dev_t dev;
679 {
680 	register struct buf *bp;
681 	register struct buf *flist;
682 	int s;
683 
684 loop:
685 	s = spl6();
686 	for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++)
687 	for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) {
688 		if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
689 			bp->b_flags |= B_ASYNC;
690 			notavail(bp);
691 			bwrite(bp);
692 			goto loop;
693 		}
694 	}
695 	splx(s);
696 }
697 
698 /*
699  * Raw I/O. The arguments are
700  *	The strategy routine for the device
701  *	A buffer, which will always be a special buffer
702  *	  header owned exclusively by the device for this purpose
703  *	The device number
704  *	Read/write flag
705  * Essentially all the work is computing physical addresses and
706  * validating them.
707  * If the user has the proper access privilidges, the process is
708  * marked 'delayed unlock' and the pages involved in the I/O are
709  * faulted and locked. After the completion of the I/O, the above pages
710  * are unlocked.
711  */
712 physio(strat, bp, dev, rw, mincnt)
713 int (*strat)();
714 register struct buf *bp;
715 unsigned (*mincnt)();
716 {
717 	register int c;
718 	char *a;
719 	int s;
720 
721 	if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) {
722 		u.u_error = EFAULT;
723 		return;
724 	}
725 	s = spl6();
726 	while (bp->b_flags&B_BUSY) {
727 		bp->b_flags |= B_WANTED;
728 		sleep((caddr_t)bp, PRIBIO+1);
729 	}
730 	bp->b_error = 0;
731 	bp->b_proc = u.u_procp;
732 	bp->b_un.b_addr = u.u_base;
733 	while (u.u_count != 0) {
734 		bp->b_flags = B_BUSY | B_PHYS | rw;
735 		bp->b_dev = dev;
736 		bp->b_blkno = u.u_offset >> PGSHIFT;
737 		bp->b_bcount = u.u_count;
738 		(*mincnt)(bp);
739 		c = bp->b_bcount;
740 		u.u_procp->p_flag |= SPHYSIO;
741 		vslock(a = bp->b_un.b_addr, c);
742 		(*strat)(bp);
743 		(void) spl6();
744 		while ((bp->b_flags&B_DONE) == 0)
745 			sleep((caddr_t)bp, PRIBIO);
746 		vsunlock(a, c, rw);
747 		u.u_procp->p_flag &= ~SPHYSIO;
748 		if (bp->b_flags&B_WANTED)
749 			wakeup((caddr_t)bp);
750 		splx(s);
751 		bp->b_un.b_addr += c;
752 		u.u_count -= c;
753 		u.u_offset += c;
754 		if (bp->b_flags&B_ERROR)
755 			break;
756 	}
757 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
758 	u.u_count = bp->b_resid;
759 	geterror(bp);
760 }
761 
762 /*ARGSUSED*/
763 unsigned
764 minphys(bp)
765 struct buf *bp;
766 {
767 
768 	if (bp->b_bcount > 60 * 1024)
769 		bp->b_bcount = 60 * 1024;
770 }
771 
772 /*
773  * Pick up the device's error number and pass it to the user;
774  * if there is an error but the number is 0 set a generalized
775  * code.  Actually the latter is always true because devices
776  * don't yet return specific errors.
777  */
778 geterror(bp)
779 register struct buf *bp;
780 {
781 
782 	if (bp->b_flags&B_ERROR)
783 		if ((u.u_error = bp->b_error)==0)
784 			u.u_error = EIO;
785 }
786 
787 /*
788  * Invalidate in core blocks belonging to closed or umounted filesystem
789  *
790  * This is not nicely done at all - the buffer ought to be removed from the
791  * hash chains & have its dev/blkno fields clobbered, but unfortunately we
792  * can't do that here, as it is quite possible that the block is still
793  * being used for i/o. Eventually, all disc drivers should be forced to
794  * have a close routine, which ought ensure that the queue is empty, then
795  * properly flush the queues. Until that happy day, this suffices for
796  * correctness.						... kre
797  */
798 binval(dev)
799 dev_t dev;
800 {
801 	register struct buf *bp;
802 	register struct bufhd *hp;
803 #define dp ((struct buf *)hp)
804 
805 	for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++)
806 		for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
807 			if (bp->b_dev == dev)
808 				bp->b_flags |= B_INVAL;
809 }
810