xref: /original-bsd/sys/kern/kern_physio.c (revision 9a96b58b)
1 /*	kern_physio.c	4.29	82/04/19	*/
2 
3 /* merged into kernel:	 @(#)bio.c 2.3 4/8/82 */
4 
5 #include "../h/param.h"
6 #include "../h/systm.h"
7 #include "../h/dir.h"
8 #include "../h/user.h"
9 #include "../h/buf.h"
10 #include "../h/conf.h"
11 #include "../h/proc.h"
12 #include "../h/seg.h"
13 #include "../h/pte.h"
14 #include "../h/vm.h"
15 #include "../h/trace.h"
16 
17 /*
18  * The following several routines allocate and free
19  * buffers with various side effects.  In general the
20  * arguments to an allocate routine are a device and
21  * a block number, and the value is a pointer to
22  * to the buffer header; the buffer is marked "busy"
23  * so that no one else can touch it.  If the block was
24  * already in core, no I/O need be done; if it is
25  * already busy, the process waits until it becomes free.
26  * The following routines allocate a buffer:
27  *	getblk
28  *	bread
29  *	breada
30  *	baddr	(if it is incore)
31  * Eventually the buffer must be released, possibly with the
32  * side effect of writing it out, by using one of
33  *	bwrite
34  *	bdwrite
35  *	bawrite
36  *	brelse
37  */
38 
39 struct	buf bfreelist[BQUEUES];
40 struct	buf bswlist, *bclnlist;
41 
42 #define	BUFHSZ	63
43 #define RND	(MAXBSIZE/DEV_BSIZE)
44 struct	bufhd bufhash[BUFHSZ];
45 #define	BUFHASH(dev, dblkno)	\
46 	((struct buf *)&bufhash[((int)(dev)+(((int)(dblkno))/RND)) % BUFHSZ])
47 
48 /*
49  * Initialize hash links for buffers.
50  */
51 bhinit()
52 {
53 	register int i;
54 	register struct bufhd *bp;
55 
56 	for (bp = bufhash, i = 0; i < BUFHSZ; i++, bp++)
57 		bp->b_forw = bp->b_back = (struct buf *)bp;
58 }
59 
60 /* #define	DISKMON	1 */
61 
62 #ifdef	DISKMON
63 struct {
64 	int	nbuf;
65 	long	nread;
66 	long	nreada;
67 	long	ncache;
68 	long	nwrite;
69 	long	bufcount[64];
70 } io_info;
71 #endif
72 
73 /*
74  * Swap IO headers -
75  * They contain the necessary information for the swap I/O.
76  * At any given time, a swap header can be in three
77  * different lists. When free it is in the free list,
78  * when allocated and the I/O queued, it is on the swap
79  * device list, and finally, if the operation was a dirty
80  * page push, when the I/O completes, it is inserted
81  * in a list of cleaned pages to be processed by the pageout daemon.
82  */
83 struct	buf *swbuf;
84 short	*swsize;		/* CAN WE JUST USE B_BCOUNT? */
85 int	*swpf;
86 
87 
88 #ifndef	UNFAST
89 #define	notavail(bp) \
90 { \
91 	int x = spl6(); \
92 	(bp)->av_back->av_forw = (bp)->av_forw; \
93 	(bp)->av_forw->av_back = (bp)->av_back; \
94 	(bp)->b_flags |= B_BUSY; \
95 	splx(x); \
96 }
97 #endif
98 
99 /*
100  * Read in (if necessary) the block and return a buffer pointer.
101  */
102 struct buf *
103 bread(dev, blkno, size)
104 	dev_t dev;
105 	daddr_t blkno;
106 	int size;
107 {
108 	register struct buf *bp;
109 
110 	bp = getblk(dev, blkno, size);
111 	if (bp->b_flags&B_DONE) {
112 #ifdef	TRACE
113 		trace(TR_BREADHIT, dev, blkno);
114 #endif
115 #ifdef	DISKMON
116 		io_info.ncache++;
117 #endif
118 		return(bp);
119 	}
120 	bp->b_flags |= B_READ;
121 	(*bdevsw[major(dev)].d_strategy)(bp);
122 #ifdef	TRACE
123 	trace(TR_BREADMISS, dev, blkno);
124 #endif
125 #ifdef	DISKMON
126 	io_info.nread++;
127 #endif
128 	u.u_vm.vm_inblk++;		/* pay for read */
129 	iowait(bp);
130 	return(bp);
131 }
132 
133 /*
134  * Read in the block, like bread, but also start I/O on the
135  * read-ahead block (which is not allocated to the caller)
136  */
137 struct buf *
138 breada(dev, blkno, rablkno, size)
139 	dev_t dev;
140 	daddr_t blkno, rablkno;
141 	int size;
142 {
143 	register struct buf *bp, *rabp;
144 
145 	bp = NULL;
146 	if (!incore(dev, blkno)) {
147 		bp = getblk(dev, blkno, size);
148 		if ((bp->b_flags&B_DONE) == 0) {
149 			bp->b_flags |= B_READ;
150 			(*bdevsw[major(dev)].d_strategy)(bp);
151 #ifdef	TRACE
152 			trace(TR_BREADMISS, dev, blkno);
153 #endif
154 #ifdef	DISKMON
155 			io_info.nread++;
156 #endif
157 			u.u_vm.vm_inblk++;		/* pay for read */
158 		}
159 #ifdef	TRACE
160 		else
161 			trace(TR_BREADHIT, dev, blkno);
162 #endif
163 	}
164 	if (rablkno && !incore(dev, rablkno)) {
165 		rabp = getblk(dev, rablkno, size);
166 		if (rabp->b_flags & B_DONE) {
167 			brelse(rabp);
168 #ifdef	TRACE
169 			trace(TR_BREADHITRA, dev, blkno);
170 #endif
171 		} else {
172 			rabp->b_flags |= B_READ|B_ASYNC;
173 			(*bdevsw[major(dev)].d_strategy)(rabp);
174 #ifdef	TRACE
175 			trace(TR_BREADMISSRA, dev, rablock);
176 #endif
177 #ifdef	DISKMON
178 			io_info.nreada++;
179 #endif
180 			u.u_vm.vm_inblk++;		/* pay in advance */
181 		}
182 	}
183 	if(bp == NULL)
184 		return(bread(dev, blkno, size));
185 	iowait(bp);
186 	return(bp);
187 }
188 
189 /*
190  * Write the buffer, waiting for completion.
191  * Then release the buffer.
192  */
193 bwrite(bp)
194 register struct buf *bp;
195 {
196 	register flag;
197 
198 	flag = bp->b_flags;
199 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE);
200 #ifdef	DISKMON
201 	io_info.nwrite++;
202 #endif
203 	if ((flag&B_DELWRI) == 0)
204 		u.u_vm.vm_oublk++;		/* noone paid yet */
205 #ifdef	TRACE
206 	trace(TR_BWRITE, bp->b_dev, bp->b_blkno);
207 #endif
208 	(*bdevsw[major(bp->b_dev)].d_strategy)(bp);
209 	if ((flag&B_ASYNC) == 0) {
210 		iowait(bp);
211 		brelse(bp);
212 	} else if (flag & B_DELWRI)
213 		bp->b_flags |= B_AGE;
214 	else
215 		geterror(bp);
216 }
217 
218 /*
219  * Release the buffer, marking it so that if it is grabbed
220  * for another purpose it will be written out before being
221  * given up (e.g. when writing a partial block where it is
222  * assumed that another write for the same block will soon follow).
223  * This can't be done for magtape, since writes must be done
224  * in the same order as requested.
225  */
226 bdwrite(bp)
227 register struct buf *bp;
228 {
229 	register int flags;
230 
231 	if ((bp->b_flags&B_DELWRI) == 0)
232 		u.u_vm.vm_oublk++;		/* noone paid yet */
233 	flags = bdevsw[major(bp->b_dev)].d_flags;
234 	if(flags & B_TAPE)
235 		bawrite(bp);
236 	else {
237 		bp->b_flags |= B_DELWRI | B_DONE;
238 		brelse(bp);
239 	}
240 }
241 
242 /*
243  * Release the buffer, start I/O on it, but don't wait for completion.
244  */
245 bawrite(bp)
246 register struct buf *bp;
247 {
248 
249 	bp->b_flags |= B_ASYNC;
250 	bwrite(bp);
251 }
252 
253 /*
254  * release the buffer, with no I/O implied.
255  */
256 brelse(bp)
257 register struct buf *bp;
258 {
259 	register struct buf *flist;
260 	register s;
261 
262 	if (bp->b_flags&B_WANTED)
263 		wakeup((caddr_t)bp);
264 	if (bfreelist[0].b_flags&B_WANTED) {
265 		bfreelist[0].b_flags &= ~B_WANTED;
266 		wakeup((caddr_t)bfreelist);
267 	}
268 	if (bp->b_flags&B_ERROR)
269 		if (bp->b_flags & B_LOCKED)
270 			bp->b_flags &= ~B_ERROR;	/* try again later */
271 		else
272 			bp->b_dev = NODEV;  		/* no assoc */
273 	s = spl6();
274 	if (bp->b_flags & (B_ERROR|B_INVAL)) {
275 		/* block has no info ... put at front of most free list */
276 		flist = &bfreelist[BQUEUES-1];
277 		flist->av_forw->av_back = bp;
278 		bp->av_forw = flist->av_forw;
279 		flist->av_forw = bp;
280 		bp->av_back = flist;
281 	} else {
282 		if (bp->b_flags & B_LOCKED)
283 			flist = &bfreelist[BQ_LOCKED];
284 		else if (bp->b_flags & B_AGE)
285 			flist = &bfreelist[BQ_AGE];
286 		else
287 			flist = &bfreelist[BQ_LRU];
288 		flist->av_back->av_forw = bp;
289 		bp->av_back = flist->av_back;
290 		flist->av_back = bp;
291 		bp->av_forw = flist;
292 	}
293 	bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE);
294 	splx(s);
295 }
296 
297 /*
298  * See if the block is associated with some buffer
299  * (mainly to avoid getting hung up on a wait in breada)
300  */
301 incore(dev, blkno)
302 dev_t dev;
303 daddr_t blkno;
304 {
305 	register struct buf *bp;
306 	register struct buf *dp;
307 
308 	dp = BUFHASH(dev, blkno);
309 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
310 		if (bp->b_blkno == blkno && bp->b_dev == dev &&
311 		    !(bp->b_flags & B_INVAL))
312 			return (1);
313 	return (0);
314 }
315 
316 struct buf *
317 baddr(dev, blkno, size)
318 	dev_t dev;
319 	daddr_t blkno;
320 	int size;
321 {
322 
323 	if (incore(dev, blkno))
324 		return (bread(dev, blkno, size));
325 	return (0);
326 }
327 
328 /*
329  * Assign a buffer for the given block.  If the appropriate
330  * block is already associated, return it; otherwise search
331  * for the oldest non-busy buffer and reassign it.
332  *
333  * We use splx here because this routine may be called
334  * on the interrupt stack during a dump, and we don't
335  * want to lower the ipl back to 0.
336  */
337 struct buf *
338 getblk(dev, blkno, size)
339 	dev_t dev;
340 	daddr_t blkno;
341 	int size;
342 {
343 	register struct buf *bp, *dp, *ep;
344 #ifdef	DISKMON
345 	register int i;
346 #endif
347 	int s;
348 
349 	if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT))
350 		blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1);
351 	dp = BUFHASH(dev, blkno);
352     loop:
353 	for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) {
354 		if (bp->b_blkno != blkno || bp->b_dev != dev ||
355 		    bp->b_flags&B_INVAL)
356 			continue;
357 		s = spl6();
358 		if (bp->b_flags&B_BUSY) {
359 			bp->b_flags |= B_WANTED;
360 			sleep((caddr_t)bp, PRIBIO+1);
361 			splx(s);
362 			goto loop;
363 		}
364 		splx(s);
365 #ifdef	DISKMON
366 		i = 0;
367 		dp = bp->av_forw;
368 		while ((dp->b_flags & B_HEAD) == 0) {
369 			i++;
370 			dp = dp->av_forw;
371 		}
372 		if (i<64)
373 			io_info.bufcount[i]++;
374 #endif
375 		notavail(bp);
376 		brealloc(bp, size);
377 		bp->b_flags |= B_CACHE;
378 		return(bp);
379 	}
380 	if (major(dev) >= nblkdev)
381 		panic("blkdev");
382 	s = spl6();
383 	for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--)
384 		if (ep->av_forw != ep)
385 			break;
386 	if (ep == bfreelist) {		/* no free blocks at all */
387 		ep->b_flags |= B_WANTED;
388 		sleep((caddr_t)ep, PRIBIO+1);
389 		splx(s);
390 		goto loop;
391 	}
392 	splx(s);
393 	bp = ep->av_forw;
394 	notavail(bp);
395 	if (bp->b_flags & B_DELWRI) {
396 		bp->b_flags |= B_ASYNC;
397 		bwrite(bp);
398 		goto loop;
399 	}
400 #ifdef TRACE
401 	trace(TR_BRELSE, bp->b_dev, bp->b_blkno);
402 #endif
403 	bp->b_flags = B_BUSY;
404 	bfree(bp);
405 	bp->b_back->b_forw = bp->b_forw;
406 	bp->b_forw->b_back = bp->b_back;
407 	bp->b_forw = dp->b_forw;
408 	bp->b_back = dp;
409 	dp->b_forw->b_back = bp;
410 	dp->b_forw = bp;
411 	bp->b_dev = dev;
412 	bp->b_blkno = blkno;
413 	brealloc(bp, size);
414 	return(bp);
415 }
416 
417 /*
418  * get an empty block,
419  * not assigned to any particular device
420  */
421 struct buf *
422 geteblk(size)
423 	int size;
424 {
425 	register struct buf *bp, *dp;
426 	int s;
427 
428 loop:
429 	s = spl6();
430 	for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--)
431 		if (dp->av_forw != dp)
432 			break;
433 	if (dp == bfreelist) {		/* no free blocks */
434 		dp->b_flags |= B_WANTED;
435 		sleep((caddr_t)dp, PRIBIO+1);
436 		goto loop;
437 	}
438 	splx(s);
439 	bp = dp->av_forw;
440 	notavail(bp);
441 	if (bp->b_flags & B_DELWRI) {
442 		bp->b_flags |= B_ASYNC;
443 		bwrite(bp);
444 		goto loop;
445 	}
446 #ifdef TRACE
447 	trace(TR_BRELSE, bp->b_dev, bp->b_blkno);
448 #endif
449 	bp->b_flags = B_BUSY|B_INVAL;
450 	bp->b_back->b_forw = bp->b_forw;
451 	bp->b_forw->b_back = bp->b_back;
452 	bp->b_forw = dp->b_forw;
453 	bp->b_back = dp;
454 	dp->b_forw->b_back = bp;
455 	dp->b_forw = bp;
456 	bp->b_dev = (dev_t)NODEV;
457 	bp->b_bcount = size;
458 	return(bp);
459 }
460 
461 /*
462  * Allocate space associated with a buffer.
463  */
464 brealloc(bp, size)
465 	register struct buf *bp;
466 	int size;
467 {
468 	daddr_t start, last;
469 	register struct buf *ep;
470 	struct buf *dp;
471 	int s;
472 
473 	/*
474 	 * First need to make sure that all overlaping previous I/O
475 	 * is dispatched with.
476 	 */
477 	if (size == bp->b_bcount)
478 		return;
479 	if (size < bp->b_bcount) {
480 		bp->b_bcount = size;
481 		return;
482 	}
483 	start = bp->b_blkno + (bp->b_bcount / DEV_BSIZE);
484 	last = bp->b_blkno + (size / DEV_BSIZE) - 1;
485 	if (bp->b_bcount == 0) {
486 		start++;
487 		if (start == last)
488 			goto allocit;
489 	}
490 	dp = BUFHASH(bp->b_dev, bp->b_blkno);
491 loop:
492 	(void) spl0();
493 	for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) {
494 		if (ep->b_blkno < start || ep->b_blkno > last ||
495 		    ep->b_dev != bp->b_dev || ep->b_flags&B_INVAL)
496 			continue;
497 		s = spl6();
498 		if (ep->b_flags&B_BUSY) {
499 			ep->b_flags |= B_WANTED;
500 			sleep((caddr_t)ep, PRIBIO+1);
501 			splx(s);
502 			goto loop;
503 		}
504 		(void) spl0();
505 		/*
506 		 * What we would really like to do is kill this
507 		 * I/O since it is now useless. We cannot do that
508 		 * so we force it to complete, so that it cannot
509 		 * over-write our useful data later.
510 		 */
511 		if (ep->b_flags & B_DELWRI) {
512 			notavail(ep);
513 			ep->b_flags |= B_ASYNC;
514 			bwrite(ep);
515 			goto loop;
516 		}
517 	}
518 allocit:
519 	/*
520 	 * Here the buffer is already available, so all we
521 	 * need to do is set the size. Someday a better memory
522 	 * management scheme will be implemented.
523 	 */
524 	bp->b_bcount = size;
525 }
526 
527 /*
528  * Release space associated with a buffer.
529  */
530 bfree(bp)
531 	struct buf *bp;
532 {
533 	/*
534 	 * Here the buffer does not change, so all we
535 	 * need to do is set the size. Someday a better memory
536 	 * management scheme will be implemented.
537 	 */
538 	bp->b_bcount = 0;
539 }
540 
541 /*
542  * Wait for I/O completion on the buffer; return errors
543  * to the user.
544  */
545 iowait(bp)
546 	register struct buf *bp;
547 {
548 	int s;
549 
550 	s = spl6();
551 	while ((bp->b_flags&B_DONE)==0)
552 		sleep((caddr_t)bp, PRIBIO);
553 	splx(s);
554 	geterror(bp);
555 }
556 
557 #ifdef UNFAST
558 /*
559  * Unlink a buffer from the available list and mark it busy.
560  * (internal interface)
561  */
562 notavail(bp)
563 register struct buf *bp;
564 {
565 	register s;
566 
567 	s = spl6();
568 	bp->av_back->av_forw = bp->av_forw;
569 	bp->av_forw->av_back = bp->av_back;
570 	bp->b_flags |= B_BUSY;
571 	splx(s);
572 }
573 #endif
574 
575 /*
576  * Mark I/O complete on a buffer. If the header
577  * indicates a dirty page push completion, the
578  * header is inserted into the ``cleaned'' list
579  * to be processed by the pageout daemon. Otherwise
580  * release it if I/O is asynchronous, and wake
581  * up anyone waiting for it.
582  */
583 iodone(bp)
584 register struct buf *bp;
585 {
586 	register int s;
587 
588 	if (bp->b_flags & B_DONE)
589 		panic("dup iodone");
590 	bp->b_flags |= B_DONE;
591 	if (bp->b_flags & B_DIRTY) {
592 		if (bp->b_flags & B_ERROR)
593 			panic("IO err in push");
594 		s = spl6();
595 		bp->av_forw = bclnlist;
596 		bp->b_bcount = swsize[bp - swbuf];
597 		bp->b_pfcent = swpf[bp - swbuf];
598 		cnt.v_pgout++;
599 		cnt.v_pgpgout += bp->b_bcount / NBPG;
600 		bclnlist = bp;
601 		if (bswlist.b_flags & B_WANTED)
602 			wakeup((caddr_t)&proc[2]);
603 		splx(s);
604 		return;
605 	}
606 	if (bp->b_flags&B_ASYNC)
607 		brelse(bp);
608 	else {
609 		bp->b_flags &= ~B_WANTED;
610 		wakeup((caddr_t)bp);
611 	}
612 }
613 
614 /*
615  * Zero the core associated with a buffer.
616  */
617 clrbuf(bp)
618 	struct buf *bp;
619 {
620 	register int *p;
621 	register int c;
622 
623 	p = bp->b_un.b_words;
624 	c = bp->b_bcount/sizeof(int);
625 	do
626 		*p++ = 0;
627 	while (--c);
628 	bp->b_resid = 0;
629 }
630 
631 /*
632  * swap I/O -
633  *
634  * If the flag indicates a dirty page push initiated
635  * by the pageout daemon, we map the page into the i th
636  * virtual page of process 2 (the daemon itself) where i is
637  * the index of the swap header that has been allocated.
638  * We simply initialize the header and queue the I/O but
639  * do not wait for completion. When the I/O completes,
640  * iodone() will link the header to a list of cleaned
641  * pages to be processed by the pageout daemon.
642  */
643 swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent)
644 	struct proc *p;
645 	swblk_t dblkno;
646 	caddr_t addr;
647 	int flag, nbytes;
648 	dev_t dev;
649 	unsigned pfcent;
650 {
651 	register struct buf *bp;
652 	register int c;
653 	int p2dp;
654 	register struct pte *dpte, *vpte;
655 	int s;
656 
657 	s = spl6();
658 	while (bswlist.av_forw == NULL) {
659 		bswlist.b_flags |= B_WANTED;
660 		sleep((caddr_t)&bswlist, PSWP+1);
661 	}
662 	bp = bswlist.av_forw;
663 	bswlist.av_forw = bp->av_forw;
664 	splx(s);
665 
666 	bp->b_flags = B_BUSY | B_PHYS | rdflg | flag;
667 	if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0)
668 		if (rdflg == B_READ)
669 			sum.v_pswpin += btoc(nbytes);
670 		else
671 			sum.v_pswpout += btoc(nbytes);
672 	bp->b_proc = p;
673 	if (flag & B_DIRTY) {
674 		p2dp = ((bp - swbuf) * CLSIZE) * KLMAX;
675 		dpte = dptopte(&proc[2], p2dp);
676 		vpte = vtopte(p, btop(addr));
677 		for (c = 0; c < nbytes; c += NBPG) {
678 			if (vpte->pg_pfnum == 0 || vpte->pg_fod)
679 				panic("swap bad pte");
680 			*dpte++ = *vpte++;
681 		}
682 		bp->b_un.b_addr = (caddr_t)ctob(p2dp);
683 	} else
684 		bp->b_un.b_addr = addr;
685 	while (nbytes > 0) {
686 		c = imin(ctob(120), nbytes);
687 		bp->b_bcount = c;
688 		bp->b_blkno = dblkno;
689 		bp->b_dev = dev;
690 		if (flag & B_DIRTY) {
691 			swpf[bp - swbuf] = pfcent;
692 			swsize[bp - swbuf] = nbytes;
693 		}
694 #ifdef TRACE
695 		trace(TR_SWAPIO, dev, bp->b_blkno);
696 #endif
697 		(*bdevsw[major(dev)].d_strategy)(bp);
698 		if (flag & B_DIRTY) {
699 			if (c < nbytes)
700 				panic("big push");
701 			return;
702 		}
703 		s = spl6();
704 		while((bp->b_flags&B_DONE)==0)
705 			sleep((caddr_t)bp, PSWP);
706 		splx(s);
707 		bp->b_un.b_addr += c;
708 		bp->b_flags &= ~B_DONE;
709 		if (bp->b_flags & B_ERROR) {
710 			if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE)
711 				panic("hard IO err in swap");
712 			swkill(p, (char *)0);
713 		}
714 		nbytes -= c;
715 		dblkno += btoc(c);
716 	}
717 	s = spl6();
718 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
719 	bp->av_forw = bswlist.av_forw;
720 	bswlist.av_forw = bp;
721 	if (bswlist.b_flags & B_WANTED) {
722 		bswlist.b_flags &= ~B_WANTED;
723 		wakeup((caddr_t)&bswlist);
724 		wakeup((caddr_t)&proc[2]);
725 	}
726 	splx(s);
727 }
728 
729 /*
730  * If rout == 0 then killed on swap error, else
731  * rout is the name of the routine where we ran out of
732  * swap space.
733  */
734 swkill(p, rout)
735 	struct proc *p;
736 	char *rout;
737 {
738 	char *mesg;
739 
740 	printf("pid %d: ", p->p_pid);
741 	if (rout)
742 		printf(mesg = "killed due to no swap space\n");
743 	else
744 		printf(mesg = "killed on swap error\n");
745 	uprintf("sorry, pid %d was %s", p->p_pid, mesg);
746 	/*
747 	 * To be sure no looping (e.g. in vmsched trying to
748 	 * swap out) mark process locked in core (as though
749 	 * done by user) after killing it so noone will try
750 	 * to swap it out.
751 	 */
752 	psignal(p, SIGKILL);
753 	p->p_flag |= SULOCK;
754 }
755 
756 /*
757  * make sure all write-behind blocks
758  * on dev (or NODEV for all)
759  * are flushed out.
760  * (from umount and update)
761  * (and temporarily pagein)
762  */
763 bflush(dev)
764 dev_t dev;
765 {
766 	register struct buf *bp;
767 	register struct buf *flist;
768 	int s;
769 
770 loop:
771 	s = spl6();
772 	for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++)
773 	for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) {
774 		if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) {
775 			bp->b_flags |= B_ASYNC;
776 			notavail(bp);
777 			bwrite(bp);
778 			goto loop;
779 		}
780 	}
781 	splx(s);
782 }
783 
784 /*
785  * Raw I/O. The arguments are
786  *	The strategy routine for the device
787  *	A buffer, which will always be a special buffer
788  *	  header owned exclusively by the device for this purpose
789  *	The device number
790  *	Read/write flag
791  * Essentially all the work is computing physical addresses and
792  * validating them.
793  * If the user has the proper access privilidges, the process is
794  * marked 'delayed unlock' and the pages involved in the I/O are
795  * faulted and locked. After the completion of the I/O, the above pages
796  * are unlocked.
797  */
798 physio(strat, bp, dev, rw, mincnt)
799 int (*strat)();
800 register struct buf *bp;
801 unsigned (*mincnt)();
802 {
803 	register int c;
804 	char *a;
805 	int s;
806 
807 	if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) {
808 		u.u_error = EFAULT;
809 		return;
810 	}
811 	s = spl6();
812 	while (bp->b_flags&B_BUSY) {
813 		bp->b_flags |= B_WANTED;
814 		sleep((caddr_t)bp, PRIBIO+1);
815 	}
816 	splx(s);
817 	bp->b_error = 0;
818 	bp->b_proc = u.u_procp;
819 	bp->b_un.b_addr = u.u_base;
820 	while (u.u_count != 0) {
821 		bp->b_flags = B_BUSY | B_PHYS | rw;
822 		bp->b_dev = dev;
823 		bp->b_blkno = u.u_offset >> PGSHIFT;
824 		bp->b_bcount = u.u_count;
825 		(*mincnt)(bp);
826 		c = bp->b_bcount;
827 		u.u_procp->p_flag |= SPHYSIO;
828 		vslock(a = bp->b_un.b_addr, c);
829 		(*strat)(bp);
830 		(void) spl6();
831 		while ((bp->b_flags&B_DONE) == 0)
832 			sleep((caddr_t)bp, PRIBIO);
833 		vsunlock(a, c, rw);
834 		u.u_procp->p_flag &= ~SPHYSIO;
835 		if (bp->b_flags&B_WANTED)
836 			wakeup((caddr_t)bp);
837 		splx(s);
838 		bp->b_un.b_addr += c;
839 		u.u_count -= c;
840 		u.u_offset += c;
841 		if (bp->b_flags&B_ERROR)
842 			break;
843 	}
844 	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS);
845 	u.u_count = bp->b_resid;
846 	geterror(bp);
847 }
848 
849 /*ARGSUSED*/
850 unsigned
851 minphys(bp)
852 struct buf *bp;
853 {
854 
855 	if (bp->b_bcount > 63 * 1024)
856 		bp->b_bcount = 63 * 1024;
857 }
858 
859 
860 /*
861  * Pick up the device's error number and pass it to the user;
862  * if there is an error but the number is 0 set a generalized
863  * code.  Actually the latter is always true because devices
864  * don't yet return specific errors.
865  */
866 geterror(bp)
867 register struct buf *bp;
868 {
869 
870 	if (bp->b_flags&B_ERROR)
871 		if ((u.u_error = bp->b_error)==0)
872 			u.u_error = EIO;
873 }
874 
875 /*
876  * Invalidate in core blocks belonging to closed or umounted filesystem
877  *
878  * This is not nicely done at all - the buffer ought to be removed from the
879  * hash chains & have its dev/blkno fields clobbered, but unfortunately we
880  * can't do that here, as it is quite possible that the block is still
881  * being used for i/o. Eventually, all disc drivers should be forced to
882  * have a close routine, which ought ensure that the queue is empty, then
883  * properly flush the queues. Until that happy day, this suffices for
884  * correctness.						... kre
885  */
886 binval(dev)
887 dev_t dev;
888 {
889 	register struct buf *bp;
890 	register struct bufhd *hp;
891 #define dp ((struct buf *)hp)
892 
893 	for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++)
894 		for (bp = dp->b_forw; bp != dp; bp = bp->b_forw)
895 			if (bp->b_dev == dev)
896 				bp->b_flags |= B_INVAL;
897 }
898