xref: /original-bsd/sys/kern/uipc_socket.c (revision 753853ba)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)uipc_socket.c	7.34 (Berkeley) 03/13/92
8  */
9 
10 #include "param.h"
11 #include "proc.h"
12 #include "file.h"
13 #include "malloc.h"
14 #include "mbuf.h"
15 #include "domain.h"
16 #include "kernel.h"
17 #include "protosw.h"
18 #include "socket.h"
19 #include "socketvar.h"
20 #include "resourcevar.h"
21 
22 /*
23  * Socket operation routines.
24  * These routines are called by the routines in
25  * sys_socket.c or from a system process, and
26  * implement the semantics of socket operations by
27  * switching out to the protocol specific routines.
28  */
29 /*ARGSUSED*/
30 socreate(dom, aso, type, proto)
31 	int dom;
32 	struct socket **aso;
33 	register int type;
34 	int proto;
35 {
36 	struct proc *p = curproc;		/* XXX */
37 	register struct protosw *prp;
38 	register struct socket *so;
39 	register int error;
40 
41 	if (proto)
42 		prp = pffindproto(dom, proto, type);
43 	else
44 		prp = pffindtype(dom, type);
45 	if (prp == 0)
46 		return (EPROTONOSUPPORT);
47 	if (prp->pr_type != type)
48 		return (EPROTOTYPE);
49 	MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
50 	bzero((caddr_t)so, sizeof(*so));
51 	so->so_type = type;
52 	if (p->p_ucred->cr_uid == 0)
53 		so->so_state = SS_PRIV;
54 	so->so_proto = prp;
55 	error =
56 	    (*prp->pr_usrreq)(so, PRU_ATTACH,
57 		(struct mbuf *)0, (struct mbuf *)proto, (struct mbuf *)0);
58 	if (error) {
59 		so->so_state |= SS_NOFDREF;
60 		sofree(so);
61 		return (error);
62 	}
63 	*aso = so;
64 	return (0);
65 }
66 
67 sobind(so, nam)
68 	struct socket *so;
69 	struct mbuf *nam;
70 {
71 	int s = splnet();
72 	int error;
73 
74 	error =
75 	    (*so->so_proto->pr_usrreq)(so, PRU_BIND,
76 		(struct mbuf *)0, nam, (struct mbuf *)0);
77 	splx(s);
78 	return (error);
79 }
80 
81 solisten(so, backlog)
82 	register struct socket *so;
83 	int backlog;
84 {
85 	int s = splnet(), error;
86 
87 	error =
88 	    (*so->so_proto->pr_usrreq)(so, PRU_LISTEN,
89 		(struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
90 	if (error) {
91 		splx(s);
92 		return (error);
93 	}
94 	if (so->so_q == 0)
95 		so->so_options |= SO_ACCEPTCONN;
96 	if (backlog < 0)
97 		backlog = 0;
98 	so->so_qlimit = min(backlog, SOMAXCONN);
99 	splx(s);
100 	return (0);
101 }
102 
103 sofree(so)
104 	register struct socket *so;
105 {
106 
107 	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
108 		return;
109 	if (so->so_head) {
110 		if (!soqremque(so, 0) && !soqremque(so, 1))
111 			panic("sofree dq");
112 		so->so_head = 0;
113 	}
114 	sbrelease(&so->so_snd);
115 	sorflush(so);
116 	FREE(so, M_SOCKET);
117 }
118 
119 /*
120  * Close a socket on last file table reference removal.
121  * Initiate disconnect if connected.
122  * Free socket when disconnect complete.
123  */
124 soclose(so)
125 	register struct socket *so;
126 {
127 	int s = splnet();		/* conservative */
128 	int error = 0;
129 
130 	if (so->so_options & SO_ACCEPTCONN) {
131 		while (so->so_q0)
132 			(void) soabort(so->so_q0);
133 		while (so->so_q)
134 			(void) soabort(so->so_q);
135 	}
136 	if (so->so_pcb == 0)
137 		goto discard;
138 	if (so->so_state & SS_ISCONNECTED) {
139 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
140 			error = sodisconnect(so);
141 			if (error)
142 				goto drop;
143 		}
144 		if (so->so_options & SO_LINGER) {
145 			if ((so->so_state & SS_ISDISCONNECTING) &&
146 			    (so->so_state & SS_NBIO))
147 				goto drop;
148 			while (so->so_state & SS_ISCONNECTED)
149 				if (error = tsleep((caddr_t)&so->so_timeo,
150 				    PSOCK | PCATCH, netcls, so->so_linger))
151 					break;
152 		}
153 	}
154 drop:
155 	if (so->so_pcb) {
156 		int error2 =
157 		    (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
158 			(struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
159 		if (error == 0)
160 			error = error2;
161 	}
162 discard:
163 	if (so->so_state & SS_NOFDREF)
164 		panic("soclose: NOFDREF");
165 	so->so_state |= SS_NOFDREF;
166 	sofree(so);
167 	splx(s);
168 	return (error);
169 }
170 
171 /*
172  * Must be called at splnet...
173  */
174 soabort(so)
175 	struct socket *so;
176 {
177 
178 	return (
179 	    (*so->so_proto->pr_usrreq)(so, PRU_ABORT,
180 		(struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
181 }
182 
183 soaccept(so, nam)
184 	register struct socket *so;
185 	struct mbuf *nam;
186 {
187 	int s = splnet();
188 	int error;
189 
190 	if ((so->so_state & SS_NOFDREF) == 0)
191 		panic("soaccept: !NOFDREF");
192 	so->so_state &= ~SS_NOFDREF;
193 	error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
194 	    (struct mbuf *)0, nam, (struct mbuf *)0);
195 	splx(s);
196 	return (error);
197 }
198 
199 soconnect(so, nam)
200 	register struct socket *so;
201 	struct mbuf *nam;
202 {
203 	int s;
204 	int error;
205 
206 	if (so->so_options & SO_ACCEPTCONN)
207 		return (EOPNOTSUPP);
208 	s = splnet();
209 	/*
210 	 * If protocol is connection-based, can only connect once.
211 	 * Otherwise, if connected, try to disconnect first.
212 	 * This allows user to disconnect by connecting to, e.g.,
213 	 * a null address.
214 	 */
215 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
216 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
217 	    (error = sodisconnect(so))))
218 		error = EISCONN;
219 	else
220 		error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
221 		    (struct mbuf *)0, nam, (struct mbuf *)0);
222 	splx(s);
223 	return (error);
224 }
225 
226 soconnect2(so1, so2)
227 	register struct socket *so1;
228 	struct socket *so2;
229 {
230 	int s = splnet();
231 	int error;
232 
233 	error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
234 	    (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0);
235 	splx(s);
236 	return (error);
237 }
238 
239 sodisconnect(so)
240 	register struct socket *so;
241 {
242 	int s = splnet();
243 	int error;
244 
245 	if ((so->so_state & SS_ISCONNECTED) == 0) {
246 		error = ENOTCONN;
247 		goto bad;
248 	}
249 	if (so->so_state & SS_ISDISCONNECTING) {
250 		error = EALREADY;
251 		goto bad;
252 	}
253 	error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
254 	    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
255 bad:
256 	splx(s);
257 	return (error);
258 }
259 
260 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
261 /*
262  * Send on a socket.
263  * If send must go all at once and message is larger than
264  * send buffering, then hard error.
265  * Lock against other senders.
266  * If must go all at once and not enough room now, then
267  * inform user that this would block and do nothing.
268  * Otherwise, if nonblocking, send as much as possible.
269  * The data to be sent is described by "uio" if nonzero,
270  * otherwise by the mbuf chain "top" (which must be null
271  * if uio is not).  Data provided in mbuf chain must be small
272  * enough to send all at once.
273  *
274  * Returns nonzero on error, timeout or signal; callers
275  * must check for short counts if EINTR/ERESTART are returned.
276  * Data and control buffers are freed on return.
277  */
278 sosend(so, addr, uio, top, control, flags)
279 	register struct socket *so;
280 	struct mbuf *addr;
281 	struct uio *uio;
282 	struct mbuf *top;
283 	struct mbuf *control;
284 	int flags;
285 {
286 	struct proc *p = curproc;		/* XXX */
287 	struct mbuf **mp;
288 	register struct mbuf *m;
289 	register long space, len, resid;
290 	int clen = 0, error, s, dontroute, mlen;
291 	int atomic = sosendallatonce(so) || top;
292 
293 	if (uio)
294 		resid = uio->uio_resid;
295 	else
296 		resid = top->m_pkthdr.len;
297 	dontroute =
298 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
299 	    (so->so_proto->pr_flags & PR_ATOMIC);
300 	p->p_stats->p_ru.ru_msgsnd++;
301 	if (control)
302 		clen = control->m_len;
303 #define	snderr(errno)	{ error = errno; splx(s); goto release; }
304 
305 restart:
306 	if (error = sblock(&so->so_snd, SBLOCKWAIT(flags)))
307 		goto out;
308 	do {
309 		s = splnet();
310 		if (so->so_state & SS_CANTSENDMORE)
311 			snderr(EPIPE);
312 		if (so->so_error)
313 			snderr(so->so_error);
314 		if ((so->so_state & SS_ISCONNECTED) == 0) {
315 			if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
316 				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
317 				    !(resid == 0 && clen != 0))
318 					snderr(ENOTCONN);
319 			} else if (addr == 0)
320 				snderr(EDESTADDRREQ);
321 		}
322 		space = sbspace(&so->so_snd);
323 		if (flags & MSG_OOB)
324 			space += 1024;
325 		if (atomic && resid > so->so_snd.sb_hiwat ||
326 		    clen > so->so_snd.sb_hiwat)
327 			snderr(EMSGSIZE);
328 		if (space < resid + clen && uio &&
329 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
330 			if (so->so_state & SS_NBIO)
331 				snderr(EWOULDBLOCK);
332 			sbunlock(&so->so_snd);
333 			error = sbwait(&so->so_snd);
334 			splx(s);
335 			if (error)
336 				goto out;
337 			goto restart;
338 		}
339 		splx(s);
340 		mp = &top;
341 		space -= clen;
342 		do {
343 		    if (uio == NULL) {
344 			/*
345 			 * Data is prepackaged in "top".
346 			 */
347 			resid = 0;
348 			if (flags & MSG_EOR)
349 				top->m_flags |= M_EOR;
350 		    } else do {
351 			if (top == 0) {
352 				MGETHDR(m, M_WAIT, MT_DATA);
353 				mlen = MHLEN;
354 				m->m_pkthdr.len = 0;
355 				m->m_pkthdr.rcvif = (struct ifnet *)0;
356 			} else {
357 				MGET(m, M_WAIT, MT_DATA);
358 				mlen = MLEN;
359 			}
360 			if (resid >= MINCLSIZE && space >= MCLBYTES) {
361 				MCLGET(m, M_WAIT);
362 				if ((m->m_flags & M_EXT) == 0)
363 					goto nopages;
364 				mlen = MCLBYTES;
365 #ifdef	MAPPED_MBUFS
366 				len = min(MCLBYTES, resid);
367 #else
368 				if (top == 0) {
369 					len = min(MCLBYTES - max_hdr, resid);
370 					m->m_data += max_hdr;
371 				} else
372 					len = min(MCLBYTES, resid);
373 #endif
374 				space -= MCLBYTES;
375 			} else {
376 nopages:
377 				len = min(min(mlen, resid), space);
378 				space -= len;
379 				/*
380 				 * For datagram protocols, leave room
381 				 * for protocol headers in first mbuf.
382 				 */
383 				if (atomic && top == 0 && len < mlen)
384 					MH_ALIGN(m, len);
385 			}
386 			error = uiomove(mtod(m, caddr_t), (int)len, uio);
387 			resid = uio->uio_resid;
388 			m->m_len = len;
389 			*mp = m;
390 			top->m_pkthdr.len += len;
391 			if (error)
392 				goto release;
393 			mp = &m->m_next;
394 			if (resid <= 0) {
395 				if (flags & MSG_EOR)
396 					top->m_flags |= M_EOR;
397 				break;
398 			}
399 		    } while (space > 0 && atomic);
400 		    if (dontroute)
401 			    so->so_options |= SO_DONTROUTE;
402 		    s = splnet();				/* XXX */
403 		    error = (*so->so_proto->pr_usrreq)(so,
404 			(flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
405 			top, addr, control);
406 		    splx(s);
407 		    if (dontroute)
408 			    so->so_options &= ~SO_DONTROUTE;
409 		    clen = 0;
410 		    control = 0;
411 		    top = 0;
412 		    mp = &top;
413 		    if (error)
414 			goto release;
415 		} while (resid && space > 0);
416 	} while (resid);
417 
418 release:
419 	sbunlock(&so->so_snd);
420 out:
421 	if (top)
422 		m_freem(top);
423 	if (control)
424 		m_freem(control);
425 	return (error);
426 }
427 
428 /*
429  * Implement receive operations on a socket.
430  * We depend on the way that records are added to the sockbuf
431  * by sbappend*.  In particular, each record (mbufs linked through m_next)
432  * must begin with an address if the protocol so specifies,
433  * followed by an optional mbuf or mbufs containing ancillary data,
434  * and then zero or more mbufs of data.
435  * In order to avoid blocking network interrupts for the entire time here,
436  * we splx() while doing the actual copy to user space.
437  * Although the sockbuf is locked, new data may still be appended,
438  * and thus we must maintain consistency of the sockbuf during that time.
439  *
440  * The caller may receive the data as a single mbuf chain by supplying
441  * an mbuf **mp0 for use in returning the chain.  The uio is then used
442  * only for the count in uio_resid.
443  */
444 soreceive(so, paddr, uio, mp0, controlp, flagsp)
445 	register struct socket *so;
446 	struct mbuf **paddr;
447 	struct uio *uio;
448 	struct mbuf **mp0;
449 	struct mbuf **controlp;
450 	int *flagsp;
451 {
452 	register struct mbuf *m, **mp;
453 	register int flags, len, error, s, offset;
454 	struct protosw *pr = so->so_proto;
455 	struct mbuf *nextrecord;
456 	int moff, type;
457 
458 	mp = mp0;
459 	if (paddr)
460 		*paddr = 0;
461 	if (controlp)
462 		*controlp = 0;
463 	if (flagsp)
464 		flags = *flagsp &~ MSG_EOR;
465 	else
466 		flags = 0;
467 	if (flags & MSG_OOB) {
468 		m = m_get(M_WAIT, MT_DATA);
469 		error = (*pr->pr_usrreq)(so, PRU_RCVOOB,
470 		    m, (struct mbuf *)(flags & MSG_PEEK), (struct mbuf *)0);
471 		if (error)
472 			goto bad;
473 		do {
474 			error = uiomove(mtod(m, caddr_t),
475 			    (int) min(uio->uio_resid, m->m_len), uio);
476 			m = m_free(m);
477 		} while (uio->uio_resid && error == 0 && m);
478 bad:
479 		if (m)
480 			m_freem(m);
481 		return (error);
482 	}
483 	if (mp)
484 		*mp = (struct mbuf *)0;
485 	if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
486 		(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
487 		    (struct mbuf *)0, (struct mbuf *)0);
488 
489 restart:
490 	if (error = sblock(&so->so_rcv, SBLOCKWAIT(flags)))
491 		return (error);
492 	s = splnet();
493 
494 	m = so->so_rcv.sb_mb;
495 	/*
496 	 * If we have less data than requested, block awaiting more
497 	 * (subject to any timeout) if:
498 	 *   1. the current count is less than the low water mark, or
499 	 *   2. MSG_WAITALL is set, and it is possible to do the entire
500 	 *	receive operation at once if we block (resid <= hiwat).
501 	 *   3. MSG_DONTWAIT is not set
502 	 * If MSG_WAITALL is set but resid is larger than the receive buffer,
503 	 * we have to do the receive in sections, and thus risk returning
504 	 * a short count if a timeout or signal occurs after we start.
505 	 */
506 	if (m == 0 || ((flags & MSG_DONTWAIT) == 0 &&
507 	    so->so_rcv.sb_cc < uio->uio_resid) &&
508 	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
509 	    ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
510 	    m->m_nextpkt == 0) {
511 #ifdef DIAGNOSTIC
512 		if (m == 0 && so->so_rcv.sb_cc)
513 			panic("receive 1");
514 #endif
515 		if (so->so_error) {
516 			if (m)
517 				goto dontblock;
518 			error = so->so_error;
519 			if ((flags & MSG_PEEK) == 0)
520 				so->so_error = 0;
521 			goto release;
522 		}
523 		if (so->so_state & SS_CANTRCVMORE) {
524 			if (m)
525 				goto dontblock;
526 			else
527 				goto release;
528 		}
529 		for (; m; m = m->m_next)
530 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
531 				m = so->so_rcv.sb_mb;
532 				goto dontblock;
533 			}
534 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
535 		    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
536 			error = ENOTCONN;
537 			goto release;
538 		}
539 		if (uio->uio_resid == 0)
540 			goto release;
541 		if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
542 			error = EWOULDBLOCK;
543 			goto release;
544 		}
545 		sbunlock(&so->so_rcv);
546 		error = sbwait(&so->so_rcv);
547 		splx(s);
548 		if (error)
549 			return (error);
550 		goto restart;
551 	}
552 dontblock:
553 	if (uio->uio_procp)
554 		uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
555 	nextrecord = m->m_nextpkt;
556 	if (pr->pr_flags & PR_ADDR) {
557 #ifdef DIAGNOSTIC
558 		if (m->m_type != MT_SONAME)
559 			panic("receive 1a");
560 #endif
561 		if (flags & MSG_PEEK) {
562 			if (paddr)
563 				*paddr = m_copy(m, 0, m->m_len);
564 			m = m->m_next;
565 		} else {
566 			sbfree(&so->so_rcv, m);
567 			if (paddr) {
568 				*paddr = m;
569 				so->so_rcv.sb_mb = m->m_next;
570 				m->m_next = 0;
571 				m = so->so_rcv.sb_mb;
572 			} else {
573 				MFREE(m, so->so_rcv.sb_mb);
574 				m = so->so_rcv.sb_mb;
575 			}
576 		}
577 	}
578 	while (m && m->m_type == MT_CONTROL && error == 0) {
579 		if (flags & MSG_PEEK) {
580 			if (controlp)
581 				*controlp = m_copy(m, 0, m->m_len);
582 			m = m->m_next;
583 		} else {
584 			sbfree(&so->so_rcv, m);
585 			if (controlp) {
586 				if (pr->pr_domain->dom_externalize &&
587 				    mtod(m, struct cmsghdr *)->cmsg_type ==
588 				    SCM_RIGHTS)
589 				   error = (*pr->pr_domain->dom_externalize)(m);
590 				*controlp = m;
591 				so->so_rcv.sb_mb = m->m_next;
592 				m->m_next = 0;
593 				m = so->so_rcv.sb_mb;
594 			} else {
595 				MFREE(m, so->so_rcv.sb_mb);
596 				m = so->so_rcv.sb_mb;
597 			}
598 		}
599 		if (controlp)
600 			controlp = &(*controlp)->m_next;
601 	}
602 	if (m) {
603 		if ((flags & MSG_PEEK) == 0)
604 			m->m_nextpkt = nextrecord;
605 		type = m->m_type;
606 		if (type == MT_OOBDATA)
607 			flags |= MSG_OOB;
608 	}
609 	moff = 0;
610 	offset = 0;
611 	while (m && uio->uio_resid > 0 && error == 0) {
612 		if (m->m_type == MT_OOBDATA) {
613 			if (type != MT_OOBDATA)
614 				break;
615 		} else if (type == MT_OOBDATA)
616 			break;
617 #ifdef DIAGNOSTIC
618 		else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
619 			panic("receive 3");
620 #endif
621 		so->so_state &= ~SS_RCVATMARK;
622 		len = uio->uio_resid;
623 		if (so->so_oobmark && len > so->so_oobmark - offset)
624 			len = so->so_oobmark - offset;
625 		if (len > m->m_len - moff)
626 			len = m->m_len - moff;
627 		/*
628 		 * If mp is set, just pass back the mbufs.
629 		 * Otherwise copy them out via the uio, then free.
630 		 * Sockbuf must be consistent here (points to current mbuf,
631 		 * it points to next record) when we drop priority;
632 		 * we must note any additions to the sockbuf when we
633 		 * block interrupts again.
634 		 */
635 		if (mp == 0) {
636 			splx(s);
637 			error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
638 			s = splnet();
639 		} else
640 			uio->uio_resid -= len;
641 		if (len == m->m_len - moff) {
642 			if (m->m_flags & M_EOR)
643 				flags |= MSG_EOR;
644 			if (flags & MSG_PEEK) {
645 				m = m->m_next;
646 				moff = 0;
647 			} else {
648 				nextrecord = m->m_nextpkt;
649 				sbfree(&so->so_rcv, m);
650 				if (mp) {
651 					*mp = m;
652 					mp = &m->m_next;
653 					so->so_rcv.sb_mb = m = m->m_next;
654 					*mp = (struct mbuf *)0;
655 				} else {
656 					MFREE(m, so->so_rcv.sb_mb);
657 					m = so->so_rcv.sb_mb;
658 				}
659 				if (m)
660 					m->m_nextpkt = nextrecord;
661 			}
662 		} else {
663 			if (flags & MSG_PEEK)
664 				moff += len;
665 			else {
666 				if (mp)
667 					*mp = m_copym(m, 0, len, M_WAIT);
668 				m->m_data += len;
669 				m->m_len -= len;
670 				so->so_rcv.sb_cc -= len;
671 			}
672 		}
673 		if (so->so_oobmark) {
674 			if ((flags & MSG_PEEK) == 0) {
675 				so->so_oobmark -= len;
676 				if (so->so_oobmark == 0) {
677 					so->so_state |= SS_RCVATMARK;
678 					break;
679 				}
680 			} else
681 				offset += len;
682 		}
683 		if (flags & MSG_EOR)
684 			break;
685 		/*
686 		 * If the MSG_WAITALL flag is set (for non-atomic socket),
687 		 * we must not quit until "uio->uio_resid == 0" or an error
688 		 * termination.  If a signal/timeout occurs, return
689 		 * with a short count but without error.
690 		 * Keep sockbuf locked against other readers.
691 		 */
692 		while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
693 		    !sosendallatonce(so)) {
694 			if (so->so_error || so->so_state & SS_CANTRCVMORE)
695 				break;
696 			error = sbwait(&so->so_rcv);
697 			if (error) {
698 				sbunlock(&so->so_rcv);
699 				splx(s);
700 				return (0);
701 			}
702 			if (m = so->so_rcv.sb_mb)
703 				nextrecord = m->m_nextpkt;
704 		}
705 	}
706 	if ((flags & MSG_PEEK) == 0) {
707 		if (m == 0)
708 			so->so_rcv.sb_mb = nextrecord;
709 		else if (pr->pr_flags & PR_ATOMIC) {
710 			flags |= MSG_TRUNC;
711 			(void) sbdroprecord(&so->so_rcv);
712 		}
713 		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
714 			(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
715 			    (struct mbuf *)flags, (struct mbuf *)0,
716 			    (struct mbuf *)0);
717 	}
718 	if (flagsp)
719 		*flagsp |= flags;
720 release:
721 	sbunlock(&so->so_rcv);
722 	splx(s);
723 	return (error);
724 }
725 
726 soshutdown(so, how)
727 	register struct socket *so;
728 	register int how;
729 {
730 	register struct protosw *pr = so->so_proto;
731 
732 	how++;
733 	if (how & FREAD)
734 		sorflush(so);
735 	if (how & FWRITE)
736 		return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN,
737 		    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
738 	return (0);
739 }
740 
741 sorflush(so)
742 	register struct socket *so;
743 {
744 	register struct sockbuf *sb = &so->so_rcv;
745 	register struct protosw *pr = so->so_proto;
746 	register int s;
747 	struct sockbuf asb;
748 
749 	sb->sb_flags |= SB_NOINTR;
750 	(void) sblock(sb, M_WAITOK);
751 	s = splimp();
752 	socantrcvmore(so);
753 	sbunlock(sb);
754 	asb = *sb;
755 	bzero((caddr_t)sb, sizeof (*sb));
756 	splx(s);
757 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
758 		(*pr->pr_domain->dom_dispose)(asb.sb_mb);
759 	sbrelease(&asb);
760 }
761 
762 sosetopt(so, level, optname, m0)
763 	register struct socket *so;
764 	int level, optname;
765 	struct mbuf *m0;
766 {
767 	int error = 0;
768 	register struct mbuf *m = m0;
769 
770 	if (level != SOL_SOCKET) {
771 		if (so->so_proto && so->so_proto->pr_ctloutput)
772 			return ((*so->so_proto->pr_ctloutput)
773 				  (PRCO_SETOPT, so, level, optname, &m0));
774 		error = ENOPROTOOPT;
775 	} else {
776 		switch (optname) {
777 
778 		case SO_LINGER:
779 			if (m == NULL || m->m_len != sizeof (struct linger)) {
780 				error = EINVAL;
781 				goto bad;
782 			}
783 			so->so_linger = mtod(m, struct linger *)->l_linger;
784 			/* fall thru... */
785 
786 		case SO_DEBUG:
787 		case SO_KEEPALIVE:
788 		case SO_DONTROUTE:
789 		case SO_USELOOPBACK:
790 		case SO_BROADCAST:
791 		case SO_REUSEADDR:
792 		case SO_OOBINLINE:
793 			if (m == NULL || m->m_len < sizeof (int)) {
794 				error = EINVAL;
795 				goto bad;
796 			}
797 			if (*mtod(m, int *))
798 				so->so_options |= optname;
799 			else
800 				so->so_options &= ~optname;
801 			break;
802 
803 		case SO_SNDBUF:
804 		case SO_RCVBUF:
805 		case SO_SNDLOWAT:
806 		case SO_RCVLOWAT:
807 			if (m == NULL || m->m_len < sizeof (int)) {
808 				error = EINVAL;
809 				goto bad;
810 			}
811 			switch (optname) {
812 
813 			case SO_SNDBUF:
814 			case SO_RCVBUF:
815 				if (sbreserve(optname == SO_SNDBUF ?
816 				    &so->so_snd : &so->so_rcv,
817 				    (u_long) *mtod(m, int *)) == 0) {
818 					error = ENOBUFS;
819 					goto bad;
820 				}
821 				break;
822 
823 			case SO_SNDLOWAT:
824 				so->so_snd.sb_lowat = *mtod(m, int *);
825 				break;
826 			case SO_RCVLOWAT:
827 				so->so_rcv.sb_lowat = *mtod(m, int *);
828 				break;
829 			}
830 			break;
831 
832 		case SO_SNDTIMEO:
833 		case SO_RCVTIMEO:
834 		    {
835 			struct timeval *tv;
836 			short val;
837 
838 			if (m == NULL || m->m_len < sizeof (*tv)) {
839 				error = EINVAL;
840 				goto bad;
841 			}
842 			tv = mtod(m, struct timeval *);
843 			if (tv->tv_sec > SHRT_MAX / hz - hz) {
844 				error = EDOM;
845 				goto bad;
846 			}
847 			val = tv->tv_sec * hz + tv->tv_usec / tick;
848 
849 			switch (optname) {
850 
851 			case SO_SNDTIMEO:
852 				so->so_snd.sb_timeo = val;
853 				break;
854 			case SO_RCVTIMEO:
855 				so->so_rcv.sb_timeo = val;
856 				break;
857 			}
858 			break;
859 		    }
860 
861 		default:
862 			error = ENOPROTOOPT;
863 			break;
864 		}
865 		m = 0;
866 		if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput)
867 			(void) ((*so->so_proto->pr_ctloutput)
868 				  (PRCO_SETOPT, so, level, optname, &m0));
869 	}
870 bad:
871 	if (m)
872 		(void) m_free(m);
873 	return (error);
874 }
875 
876 sogetopt(so, level, optname, mp)
877 	register struct socket *so;
878 	int level, optname;
879 	struct mbuf **mp;
880 {
881 	register struct mbuf *m;
882 
883 	if (level != SOL_SOCKET) {
884 		if (so->so_proto && so->so_proto->pr_ctloutput) {
885 			return ((*so->so_proto->pr_ctloutput)
886 				  (PRCO_GETOPT, so, level, optname, mp));
887 		} else
888 			return (ENOPROTOOPT);
889 	} else {
890 		m = m_get(M_WAIT, MT_SOOPTS);
891 		m->m_len = sizeof (int);
892 
893 		switch (optname) {
894 
895 		case SO_LINGER:
896 			m->m_len = sizeof (struct linger);
897 			mtod(m, struct linger *)->l_onoff =
898 				so->so_options & SO_LINGER;
899 			mtod(m, struct linger *)->l_linger = so->so_linger;
900 			break;
901 
902 		case SO_USELOOPBACK:
903 		case SO_DONTROUTE:
904 		case SO_DEBUG:
905 		case SO_KEEPALIVE:
906 		case SO_REUSEADDR:
907 		case SO_BROADCAST:
908 		case SO_OOBINLINE:
909 			*mtod(m, int *) = so->so_options & optname;
910 			break;
911 
912 		case SO_TYPE:
913 			*mtod(m, int *) = so->so_type;
914 			break;
915 
916 		case SO_ERROR:
917 			*mtod(m, int *) = so->so_error;
918 			so->so_error = 0;
919 			break;
920 
921 		case SO_SNDBUF:
922 			*mtod(m, int *) = so->so_snd.sb_hiwat;
923 			break;
924 
925 		case SO_RCVBUF:
926 			*mtod(m, int *) = so->so_rcv.sb_hiwat;
927 			break;
928 
929 		case SO_SNDLOWAT:
930 			*mtod(m, int *) = so->so_snd.sb_lowat;
931 			break;
932 
933 		case SO_RCVLOWAT:
934 			*mtod(m, int *) = so->so_rcv.sb_lowat;
935 			break;
936 
937 		case SO_SNDTIMEO:
938 		case SO_RCVTIMEO:
939 		    {
940 			int val = (optname == SO_SNDTIMEO ?
941 			     so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
942 
943 			m->m_len = sizeof(struct timeval);
944 			mtod(m, struct timeval *)->tv_sec = val / hz;
945 			mtod(m, struct timeval *)->tv_usec =
946 			    (val % hz) / tick;
947 			break;
948 		    }
949 
950 		default:
951 			(void)m_free(m);
952 			return (ENOPROTOOPT);
953 		}
954 		*mp = m;
955 		return (0);
956 	}
957 }
958 
959 sohasoutofband(so)
960 	register struct socket *so;
961 {
962 	struct proc *p;
963 
964 	if (so->so_pgid < 0)
965 		gsignal(-so->so_pgid, SIGURG);
966 	else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
967 		psignal(p, SIGURG);
968 	selwakeup(&so->so_rcv.sb_sel);
969 }
970