xref: /netbsd/sys/dev/kttcp.c (revision c4a72b64)
1 /*	$NetBSD: kttcp.c,v 1.6 2002/10/23 09:13:01 jdolecek Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Frank van der Linden and Jason R. Thorpe for
8  * Wasabi Systems, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed for the NetBSD Project by
21  *	Wasabi Systems, Inc.
22  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
23  *    or promote products derived from this software without specific prior
24  *    written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*
40  * kttcp.c --
41  *
42  *	This module provides kernel support for testing network
43  *	throughput from the perspective of the kernel.  It is
44  *	similar in spirit to the classic ttcp network benchmark
45  *	program, the main difference being that with kttcp, the
46  *	kernel is the source and sink of the data.
47  *
48  *	Testing like this is useful for a few reasons:
49  *
50  *	1. This allows us to know what kind of performance we can
51  *	   expect from network applications that run in the kernel
52  *	   space, such as the NFS server or the NFS client.  These
53  *	   applications don't have to move the data to/from userspace,
54  *	   and so benchmark programs which run in userspace don't
55  *	   give us an accurate model.
56  *
57  *	2. Since data received is just thrown away, the receiver
58  *	   is very fast.  This can provide better exercise for the
59  *	   sender at the other end.
60  *
61  *	3. Since the NetBSD kernel currently uses a run-to-completion
62  *	   scheduling model, kttcp provides a benchmark model where
63  *	   preemption of the benchmark program is not an issue.
64  */
65 
66 #include <sys/param.h>
67 #include <sys/types.h>
68 #include <sys/ioctl.h>
69 #include <sys/file.h>
70 #include <sys/filedesc.h>
71 #include <sys/conf.h>
72 #include <sys/systm.h>
73 #include <sys/protosw.h>
74 #include <sys/proc.h>
75 #include <sys/resourcevar.h>
76 #include <sys/signal.h>
77 #include <sys/socketvar.h>
78 #include <sys/socket.h>
79 #include <sys/mbuf.h>
80 #include <sys/mount.h>
81 #include <sys/syscallargs.h>
82 
83 #include <dev/kttcpio.h>
84 
85 static int kttcp_send(struct proc *p, struct kttcp_io_args *);
86 static int kttcp_recv(struct proc *p, struct kttcp_io_args *);
87 static int kttcp_sosend(struct socket *, unsigned long long,
88 			unsigned long long *, struct proc *, int);
89 static int kttcp_soreceive(struct socket *, unsigned long long,
90 			   unsigned long long *, struct proc *, int *);
91 
92 void	kttcpattach(int);
93 
94 dev_type_ioctl(kttcpioctl);
95 
96 const struct cdevsw kttcp_cdevsw = {
97 	nullopen, nullclose, noread, nowrite, kttcpioctl,
98 	nostop, notty, nopoll, nommap, nokqfilter,
99 };
100 
101 void
102 kttcpattach(int count)
103 {
104 	/* Do nothing. */
105 }
106 
107 int
108 kttcpioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
109 {
110 	int error;
111 
112 	if ((flag & FWRITE) == 0)
113 		return EPERM;
114 
115 	switch (cmd) {
116 	case KTTCP_IO_SEND:
117 		error = kttcp_send(p, (struct kttcp_io_args *) data);
118 		break;
119 
120 	case KTTCP_IO_RECV:
121 		error = kttcp_recv(p, (struct kttcp_io_args *) data);
122 		break;
123 
124 	default:
125 		return EINVAL;
126 	}
127 
128 	return error;
129 }
130 
131 static int
132 kttcp_send(struct proc *p, struct kttcp_io_args *kio)
133 {
134 	struct file *fp;
135 	int error;
136 	struct timeval t0, t1;
137 	unsigned long long len, done;
138 
139 	if (kio->kio_totalsize >= KTTCP_MAX_XMIT)
140 		return EINVAL;
141 
142 	fp = fd_getfile(p->p_fd, kio->kio_socket);
143 	if (fp == NULL)
144 		return EBADF;
145 	if (fp->f_type != DTYPE_SOCKET)
146 		return EFTYPE;
147 
148 	len = kio->kio_totalsize;
149 	microtime(&t0);
150 	do {
151 		error = kttcp_sosend((struct socket *)fp->f_data, len,
152 		    &done, p, 0);
153 		len -= done;
154 	} while (error == 0 && len > 0);
155 	microtime(&t1);
156 	if (error != 0)
157 		return error;
158 	timersub(&t1, &t0, &kio->kio_elapsed);
159 
160 	kio->kio_bytesdone = kio->kio_totalsize - len;
161 
162 	return 0;
163 }
164 
165 static int
166 kttcp_recv(struct proc *p, struct kttcp_io_args *kio)
167 {
168 	struct file *fp;
169 	int error;
170 	struct timeval t0, t1;
171 	unsigned long long len, done;
172 
173 	if (kio->kio_totalsize > KTTCP_MAX_XMIT)
174 		return EINVAL;
175 
176 	fp = fd_getfile(p->p_fd, kio->kio_socket);
177 	if (fp == NULL || fp->f_type != DTYPE_SOCKET)
178 		return EBADF;
179 	len = kio->kio_totalsize;
180 	microtime(&t0);
181 	do {
182 		error = kttcp_soreceive((struct socket *)fp->f_data,
183 		    len, &done, p, NULL);
184 		len -= done;
185 	} while (error == 0 && len > 0 && done > 0);
186 	microtime(&t1);
187 	if (error == EPIPE)
188 		error = 0;
189 	if (error != 0)
190 		return error;
191 	timersub(&t1, &t0, &kio->kio_elapsed);
192 
193 	kio->kio_bytesdone = kio->kio_totalsize - len;
194 
195 	return 0;
196 }
197 
198 #define SBLOCKWAIT(f)   (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
199 
200 /*
201  * Slightly changed version of sosend()
202  */
203 static int
204 kttcp_sosend(struct socket *so, unsigned long long slen,
205 	     unsigned long long *done, struct proc *p, int flags)
206 {
207 	struct mbuf **mp, *m, *top;
208 	long space, len, mlen;
209 	int error, s, dontroute, atomic;
210 	long long resid;
211 
212 	atomic = sosendallatonce(so);
213 	resid = slen;
214 	top = NULL;
215 	/*
216 	 * In theory resid should be unsigned.
217 	 * However, space must be signed, as it might be less than 0
218 	 * if we over-committed, and we must use a signed comparison
219 	 * of space and resid.  On the other hand, a negative resid
220 	 * causes us to loop sending 0-length segments to the protocol.
221 	 */
222 	if (resid < 0) {
223 		error = EINVAL;
224 		goto out;
225 	}
226 	dontroute =
227 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
228 	    (so->so_proto->pr_flags & PR_ATOMIC);
229 	p->p_stats->p_ru.ru_msgsnd++;
230 #define	snderr(errno)	{ error = errno; splx(s); goto release; }
231 
232  restart:
233 	if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
234 		goto out;
235 	do {
236 		s = splsoftnet();
237 		if (so->so_state & SS_CANTSENDMORE)
238 			snderr(EPIPE);
239 		if (so->so_error) {
240 			error = so->so_error;
241 			so->so_error = 0;
242 			splx(s);
243 			goto release;
244 		}
245 		if ((so->so_state & SS_ISCONNECTED) == 0) {
246 			if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
247 				if ((so->so_state & SS_ISCONFIRMING) == 0)
248 					snderr(ENOTCONN);
249 			} else
250 				snderr(EDESTADDRREQ);
251 		}
252 		space = sbspace(&so->so_snd);
253 		if (flags & MSG_OOB)
254 			space += 1024;
255 		if ((atomic && resid > so->so_snd.sb_hiwat))
256 			snderr(EMSGSIZE);
257 		if (space < resid && (atomic || space < so->so_snd.sb_lowat)) {
258 			if (so->so_state & SS_NBIO)
259 				snderr(EWOULDBLOCK);
260 			SBLASTRECORDCHK(&so->so_rcv,
261 			    "kttcp_soreceive sbwait 1");
262 			SBLASTMBUFCHK(&so->so_rcv,
263 			    "kttcp_soreceive sbwait 1");
264 			sbunlock(&so->so_snd);
265 			error = sbwait(&so->so_snd);
266 			splx(s);
267 			if (error)
268 				goto out;
269 			goto restart;
270 		}
271 		splx(s);
272 		mp = &top;
273 		do {
274 			do {
275 				if (top == 0) {
276 					MGETHDR(m, M_WAIT, MT_DATA);
277 					mlen = MHLEN;
278 					m->m_pkthdr.len = 0;
279 					m->m_pkthdr.rcvif = (struct ifnet *)0;
280 				} else {
281 					MGET(m, M_WAIT, MT_DATA);
282 					mlen = MLEN;
283 				}
284 				if (resid >= MINCLSIZE && space >= MCLBYTES) {
285 					MCLGET(m, M_WAIT);
286 					if ((m->m_flags & M_EXT) == 0)
287 						goto nopages;
288 					mlen = MCLBYTES;
289 #ifdef	MAPPED_MBUFS
290 					len = lmin(MCLBYTES, resid);
291 #else
292 					if (atomic && top == 0) {
293 						len = lmin(MCLBYTES - max_hdr,
294 						    resid);
295 						m->m_data += max_hdr;
296 					} else
297 						len = lmin(MCLBYTES, resid);
298 #endif
299 					space -= len;
300 				} else {
301 nopages:
302 					len = lmin(lmin(mlen, resid), space);
303 					space -= len;
304 					/*
305 					 * For datagram protocols, leave room
306 					 * for protocol headers in first mbuf.
307 					 */
308 					if (atomic && top == 0 && len < mlen)
309 						MH_ALIGN(m, len);
310 				}
311 				resid -= len;
312 				m->m_len = len;
313 				*mp = m;
314 				top->m_pkthdr.len += len;
315 				if (error)
316 					goto release;
317 				mp = &m->m_next;
318 				if (resid <= 0) {
319 					if (flags & MSG_EOR)
320 						top->m_flags |= M_EOR;
321 					break;
322 				}
323 			} while (space > 0 && atomic);
324 
325 			s = splsoftnet();
326 
327 			if (so->so_state & SS_CANTSENDMORE)
328 				snderr(EPIPE);
329 
330 			if (dontroute)
331 				so->so_options |= SO_DONTROUTE;
332 			if (resid > 0)
333 				so->so_state |= SS_MORETOCOME;
334 			error = (*so->so_proto->pr_usrreq)(so,
335 			    (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
336 			    top, NULL, NULL, p);
337 			if (dontroute)
338 				so->so_options &= ~SO_DONTROUTE;
339 			if (resid > 0)
340 				so->so_state &= ~SS_MORETOCOME;
341 			splx(s);
342 
343 			top = 0;
344 			mp = &top;
345 			if (error)
346 				goto release;
347 		} while (resid && space > 0);
348 	} while (resid);
349 
350  release:
351 	sbunlock(&so->so_snd);
352  out:
353 	if (top)
354 		m_freem(top);
355 	*done = slen - resid;
356 #if 0
357 	printf("sosend: error %d slen %llu resid %lld\n", error, slen, resid);
358 #endif
359 	return (error);
360 }
361 
362 static int
363 kttcp_soreceive(struct socket *so, unsigned long long slen,
364 		unsigned long long *done, struct proc *p, int *flagsp)
365 {
366 	struct mbuf *m, **mp;
367 	int flags, len, error, s, offset, moff, type;
368 	long long orig_resid, resid;
369 	struct protosw	*pr;
370 	struct mbuf *nextrecord;
371 
372 	pr = so->so_proto;
373 	mp = NULL;
374 	type = 0;
375 	resid = orig_resid = slen;
376 	if (flagsp)
377 		flags = *flagsp &~ MSG_EOR;
378 	else
379  		flags = 0;
380 	if (flags & MSG_OOB) {
381 		m = m_get(M_WAIT, MT_DATA);
382 		error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
383 		    (struct mbuf *)(long)(flags & MSG_PEEK), (struct mbuf *)0,
384 		    (struct proc *)0);
385 		if (error)
386 			goto bad;
387 		do {
388 			resid -= min(resid, m->m_len);
389 			m = m_free(m);
390 		} while (resid && error == 0 && m);
391  bad:
392 		if (m)
393 			m_freem(m);
394 		return (error);
395 	}
396 	if (mp)
397 		*mp = (struct mbuf *)0;
398 	if (so->so_state & SS_ISCONFIRMING && resid)
399 		(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
400 		    (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0);
401 
402  restart:
403 	if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
404 		return (error);
405 	s = splsoftnet();
406 
407 	m = so->so_rcv.sb_mb;
408 	/*
409 	 * If we have less data than requested, block awaiting more
410 	 * (subject to any timeout) if:
411 	 *   1. the current count is less than the low water mark,
412 	 *   2. MSG_WAITALL is set, and it is possible to do the entire
413 	 *	receive operation at once if we block (resid <= hiwat), or
414 	 *   3. MSG_DONTWAIT is not set.
415 	 * If MSG_WAITALL is set but resid is larger than the receive buffer,
416 	 * we have to do the receive in sections, and thus risk returning
417 	 * a short count if a timeout or signal occurs after we start.
418 	 */
419 	if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
420 	    so->so_rcv.sb_cc < resid) &&
421 	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
422 	    ((flags & MSG_WAITALL) && resid <= so->so_rcv.sb_hiwat)) &&
423 	    m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
424 #ifdef DIAGNOSTIC
425 		if (m == 0 && so->so_rcv.sb_cc)
426 			panic("receive 1");
427 #endif
428 		if (so->so_error) {
429 			if (m)
430 				goto dontblock;
431 			error = so->so_error;
432 			if ((flags & MSG_PEEK) == 0)
433 				so->so_error = 0;
434 			goto release;
435 		}
436 		if (so->so_state & SS_CANTRCVMORE) {
437 			if (m)
438 				goto dontblock;
439 			else
440 				goto release;
441 		}
442 		for (; m; m = m->m_next)
443 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
444 				m = so->so_rcv.sb_mb;
445 				goto dontblock;
446 			}
447 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
448 		    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
449 			error = ENOTCONN;
450 			goto release;
451 		}
452 		if (resid == 0)
453 			goto release;
454 		if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
455 			error = EWOULDBLOCK;
456 			goto release;
457 		}
458 		sbunlock(&so->so_rcv);
459 		error = sbwait(&so->so_rcv);
460 		splx(s);
461 		if (error)
462 			return (error);
463 		goto restart;
464 	}
465  dontblock:
466 	/*
467 	 * On entry here, m points to the first record of the socket buffer.
468 	 * While we process the initial mbufs containing address and control
469 	 * info, we save a copy of m->m_nextpkt into nextrecord.
470 	 */
471 #ifdef notyet /* XXXX */
472 	if (uio->uio_procp)
473 		uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
474 #endif
475 	KASSERT(m == so->so_rcv.sb_mb);
476 	SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive 1");
477 	SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive 1");
478 	nextrecord = m->m_nextpkt;
479 	if (pr->pr_flags & PR_ADDR) {
480 #ifdef DIAGNOSTIC
481 		if (m->m_type != MT_SONAME)
482 			panic("receive 1a");
483 #endif
484 		orig_resid = 0;
485 		if (flags & MSG_PEEK) {
486 			m = m->m_next;
487 		} else {
488 			sbfree(&so->so_rcv, m);
489 			MFREE(m, so->so_rcv.sb_mb);
490 			m = so->so_rcv.sb_mb;
491 		}
492 	}
493 	while (m && m->m_type == MT_CONTROL && error == 0) {
494 		if (flags & MSG_PEEK) {
495 			m = m->m_next;
496 		} else {
497 			sbfree(&so->so_rcv, m);
498 			MFREE(m, so->so_rcv.sb_mb);
499 			m = so->so_rcv.sb_mb;
500 		}
501 	}
502 
503 	/*
504 	 * If m is non-NULL, we have some data to read.  From now on,
505 	 * make sure to keep sb_lastrecord consistent when working on
506 	 * the last packet on the chain (nextrecord == NULL) and we
507 	 * change m->m_nextpkt.
508 	 */
509 	if (m) {
510 		if ((flags & MSG_PEEK) == 0) {
511 			m->m_nextpkt = nextrecord;
512 			/*
513 			 * If nextrecord == NULL (this is a single chain),
514 			 * then sb_lastrecord may not be valid here if m
515 			 * was changed earlier.
516 			 */
517 			if (nextrecord == NULL) {
518 				KASSERT(so->so_rcv.sb_mb == m);
519 				so->so_rcv.sb_lastrecord = m;
520 			}
521 		}
522 		type = m->m_type;
523 		if (type == MT_OOBDATA)
524 			flags |= MSG_OOB;
525 	} else {
526 		if ((flags & MSG_PEEK) == 0) {
527 			KASSERT(so->so_rcv.sb_mb == m);
528 			so->so_rcv.sb_mb = nextrecord;
529 			SB_EMPTY_FIXUP(&so->so_rcv);
530 		}
531 	}
532 	SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive 2");
533 	SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive 2");
534 
535 	moff = 0;
536 	offset = 0;
537 	while (m && resid > 0 && error == 0) {
538 		if (m->m_type == MT_OOBDATA) {
539 			if (type != MT_OOBDATA)
540 				break;
541 		} else if (type == MT_OOBDATA)
542 			break;
543 #ifdef DIAGNOSTIC
544 		else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
545 			panic("receive 3");
546 #endif
547 		so->so_state &= ~SS_RCVATMARK;
548 		len = resid;
549 		if (so->so_oobmark && len > so->so_oobmark - offset)
550 			len = so->so_oobmark - offset;
551 		if (len > m->m_len - moff)
552 			len = m->m_len - moff;
553 		/*
554 		 * If mp is set, just pass back the mbufs.
555 		 * Otherwise copy them out via the uio, then free.
556 		 * Sockbuf must be consistent here (points to current mbuf,
557 		 * it points to next record) when we drop priority;
558 		 * we must note any additions to the sockbuf when we
559 		 * block interrupts again.
560 		 */
561 		resid -= len;
562 		if (len == m->m_len - moff) {
563 			if (m->m_flags & M_EOR)
564 				flags |= MSG_EOR;
565 			if (flags & MSG_PEEK) {
566 				m = m->m_next;
567 				moff = 0;
568 			} else {
569 				nextrecord = m->m_nextpkt;
570 				sbfree(&so->so_rcv, m);
571 				if (mp) {
572 					*mp = m;
573 					mp = &m->m_next;
574 					so->so_rcv.sb_mb = m = m->m_next;
575 					*mp = (struct mbuf *)0;
576 				} else {
577 					MFREE(m, so->so_rcv.sb_mb);
578 					m = so->so_rcv.sb_mb;
579 				}
580 				/*
581 				 * If m != NULL, we also know that
582 				 * so->so_rcv.sb_mb != NULL.
583 				 */
584 				KASSERT(so->so_rcv.sb_mb == m);
585 				if (m) {
586 					m->m_nextpkt = nextrecord;
587 					if (nextrecord == NULL)
588 						so->so_rcv.sb_lastrecord = m;
589 				} else {
590 					so->so_rcv.sb_mb = nextrecord;
591 					SB_EMPTY_FIXUP(&so->so_rcv);
592 				}
593 				SBLASTRECORDCHK(&so->so_rcv,
594 				    "kttcp_soreceive 3");
595 				SBLASTMBUFCHK(&so->so_rcv,
596 				    "kttcp_soreceive 3");
597 			}
598 		} else {
599 			if (flags & MSG_PEEK)
600 				moff += len;
601 			else {
602 				if (mp)
603 					*mp = m_copym(m, 0, len, M_WAIT);
604 				m->m_data += len;
605 				m->m_len -= len;
606 				so->so_rcv.sb_cc -= len;
607 			}
608 		}
609 		if (so->so_oobmark) {
610 			if ((flags & MSG_PEEK) == 0) {
611 				so->so_oobmark -= len;
612 				if (so->so_oobmark == 0) {
613 					so->so_state |= SS_RCVATMARK;
614 					break;
615 				}
616 			} else {
617 				offset += len;
618 				if (offset == so->so_oobmark)
619 					break;
620 			}
621 		}
622 		if (flags & MSG_EOR)
623 			break;
624 		/*
625 		 * If the MSG_WAITALL flag is set (for non-atomic socket),
626 		 * we must not quit until "uio->uio_resid == 0" or an error
627 		 * termination.  If a signal/timeout occurs, return
628 		 * with a short count but without error.
629 		 * Keep sockbuf locked against other readers.
630 		 */
631 		while (flags & MSG_WAITALL && m == 0 && resid > 0 &&
632 		    !sosendallatonce(so) && !nextrecord) {
633 			if (so->so_error || so->so_state & SS_CANTRCVMORE)
634 				break;
635 			/*
636 			 * If we are peeking and the socket receive buffer is
637 			 * full, stop since we can't get more data to peek at.
638 			 */
639 			if ((flags & MSG_PEEK) && sbspace(&so->so_rcv) <= 0)
640 				break;
641 			/*
642 			 * If we've drained the socket buffer, tell the
643 			 * protocol in case it needs to do something to
644 			 * get it filled again.
645 			 */
646 			if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb)
647 				(*pr->pr_usrreq)(so, PRU_RCVD,
648 				    (struct mbuf *)0,
649 				    (struct mbuf *)(long)flags,
650 				    (struct mbuf *)0,
651 				    (struct proc *)0);
652 			SBLASTRECORDCHK(&so->so_rcv,
653 			    "kttcp_soreceive sbwait 2");
654 			SBLASTMBUFCHK(&so->so_rcv,
655 			    "kttcp_soreceive sbwait 2");
656 			error = sbwait(&so->so_rcv);
657 			if (error) {
658 				sbunlock(&so->so_rcv);
659 				splx(s);
660 				return (0);
661 			}
662 			if ((m = so->so_rcv.sb_mb) != NULL)
663 				nextrecord = m->m_nextpkt;
664 		}
665 	}
666 
667 	if (m && pr->pr_flags & PR_ATOMIC) {
668 		flags |= MSG_TRUNC;
669 		if ((flags & MSG_PEEK) == 0)
670 			(void) sbdroprecord(&so->so_rcv);
671 	}
672 	if ((flags & MSG_PEEK) == 0) {
673 		if (m == 0) {
674 			/*
675 			 * First part is an SB_EMPTY_FIXUP().  Second part
676 			 * makes sure sb_lastrecord is up-to-date if
677 			 * there is still data in the socket buffer.
678 			 */
679 			so->so_rcv.sb_mb = nextrecord;
680 			if (so->so_rcv.sb_mb == NULL) {
681 				so->so_rcv.sb_mbtail = NULL;
682 				so->so_rcv.sb_lastrecord = NULL;
683 			} else if (nextrecord->m_nextpkt == NULL)
684 				so->so_rcv.sb_lastrecord = nextrecord;
685 		}
686 		SBLASTRECORDCHK(&so->so_rcv, "kttcp_soreceive 4");
687 		SBLASTMBUFCHK(&so->so_rcv, "kttcp_soreceive 4");
688 		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
689 			(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
690 			    (struct mbuf *)(long)flags, (struct mbuf *)0,
691 			    (struct proc *)0);
692 	}
693 	if (orig_resid == resid && orig_resid &&
694 	    (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
695 		sbunlock(&so->so_rcv);
696 		splx(s);
697 		goto restart;
698 	}
699 
700 	if (flagsp)
701 		*flagsp |= flags;
702  release:
703 	sbunlock(&so->so_rcv);
704 	splx(s);
705 	*done = slen - resid;
706 #if 0
707 	printf("soreceive: error %d slen %llu resid %lld\n", error, slen, resid);
708 #endif
709 	return (error);
710 }
711