xref: /original-bsd/sys/kern/uipc_socket.c (revision 7e5c8007)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)uipc_socket.c	8.3 (Berkeley) 04/15/94
8  */
9 
10 #include <sys/param.h>
11 #include <sys/systm.h>
12 #include <sys/proc.h>
13 #include <sys/file.h>
14 #include <sys/malloc.h>
15 #include <sys/mbuf.h>
16 #include <sys/domain.h>
17 #include <sys/kernel.h>
18 #include <sys/protosw.h>
19 #include <sys/socket.h>
20 #include <sys/socketvar.h>
21 #include <sys/resourcevar.h>
22 
23 /*
24  * Socket operation routines.
25  * These routines are called by the routines in
26  * sys_socket.c or from a system process, and
27  * implement the semantics of socket operations by
28  * switching out to the protocol specific routines.
29  */
30 /*ARGSUSED*/
31 socreate(dom, aso, type, proto)
32 	int dom;
33 	struct socket **aso;
34 	register int type;
35 	int proto;
36 {
37 	struct proc *p = curproc;		/* XXX */
38 	register struct protosw *prp;
39 	register struct socket *so;
40 	register int error;
41 
42 	if (proto)
43 		prp = pffindproto(dom, proto, type);
44 	else
45 		prp = pffindtype(dom, type);
46 	if (prp == 0 || prp->pr_usrreq == 0)
47 		return (EPROTONOSUPPORT);
48 	if (prp->pr_type != type)
49 		return (EPROTOTYPE);
50 	MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
51 	bzero((caddr_t)so, sizeof(*so));
52 	so->so_type = type;
53 	if (p->p_ucred->cr_uid == 0)
54 		so->so_state = SS_PRIV;
55 	so->so_proto = prp;
56 	error =
57 	    (*prp->pr_usrreq)(so, PRU_ATTACH,
58 		(struct mbuf *)0, (struct mbuf *)proto, (struct mbuf *)0);
59 	if (error) {
60 		so->so_state |= SS_NOFDREF;
61 		sofree(so);
62 		return (error);
63 	}
64 	*aso = so;
65 	return (0);
66 }
67 
68 sobind(so, nam)
69 	struct socket *so;
70 	struct mbuf *nam;
71 {
72 	int s = splnet();
73 	int error;
74 
75 	error =
76 	    (*so->so_proto->pr_usrreq)(so, PRU_BIND,
77 		(struct mbuf *)0, nam, (struct mbuf *)0);
78 	splx(s);
79 	return (error);
80 }
81 
82 solisten(so, backlog)
83 	register struct socket *so;
84 	int backlog;
85 {
86 	int s = splnet(), error;
87 
88 	error =
89 	    (*so->so_proto->pr_usrreq)(so, PRU_LISTEN,
90 		(struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
91 	if (error) {
92 		splx(s);
93 		return (error);
94 	}
95 	if (so->so_q == 0)
96 		so->so_options |= SO_ACCEPTCONN;
97 	if (backlog < 0)
98 		backlog = 0;
99 	so->so_qlimit = min(backlog, SOMAXCONN);
100 	splx(s);
101 	return (0);
102 }
103 
104 sofree(so)
105 	register struct socket *so;
106 {
107 
108 	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
109 		return;
110 	if (so->so_head) {
111 		if (!soqremque(so, 0) && !soqremque(so, 1))
112 			panic("sofree dq");
113 		so->so_head = 0;
114 	}
115 	sbrelease(&so->so_snd);
116 	sorflush(so);
117 	FREE(so, M_SOCKET);
118 }
119 
120 /*
121  * Close a socket on last file table reference removal.
122  * Initiate disconnect if connected.
123  * Free socket when disconnect complete.
124  */
125 soclose(so)
126 	register struct socket *so;
127 {
128 	int s = splnet();		/* conservative */
129 	int error = 0;
130 
131 	if (so->so_options & SO_ACCEPTCONN) {
132 		while (so->so_q0)
133 			(void) soabort(so->so_q0);
134 		while (so->so_q)
135 			(void) soabort(so->so_q);
136 	}
137 	if (so->so_pcb == 0)
138 		goto discard;
139 	if (so->so_state & SS_ISCONNECTED) {
140 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
141 			error = sodisconnect(so);
142 			if (error)
143 				goto drop;
144 		}
145 		if (so->so_options & SO_LINGER) {
146 			if ((so->so_state & SS_ISDISCONNECTING) &&
147 			    (so->so_state & SS_NBIO))
148 				goto drop;
149 			while (so->so_state & SS_ISCONNECTED)
150 				if (error = tsleep((caddr_t)&so->so_timeo,
151 				    PSOCK | PCATCH, netcls, so->so_linger))
152 					break;
153 		}
154 	}
155 drop:
156 	if (so->so_pcb) {
157 		int error2 =
158 		    (*so->so_proto->pr_usrreq)(so, PRU_DETACH,
159 			(struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
160 		if (error == 0)
161 			error = error2;
162 	}
163 discard:
164 	if (so->so_state & SS_NOFDREF)
165 		panic("soclose: NOFDREF");
166 	so->so_state |= SS_NOFDREF;
167 	sofree(so);
168 	splx(s);
169 	return (error);
170 }
171 
172 /*
173  * Must be called at splnet...
174  */
175 soabort(so)
176 	struct socket *so;
177 {
178 
179 	return (
180 	    (*so->so_proto->pr_usrreq)(so, PRU_ABORT,
181 		(struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
182 }
183 
184 soaccept(so, nam)
185 	register struct socket *so;
186 	struct mbuf *nam;
187 {
188 	int s = splnet();
189 	int error;
190 
191 	if ((so->so_state & SS_NOFDREF) == 0)
192 		panic("soaccept: !NOFDREF");
193 	so->so_state &= ~SS_NOFDREF;
194 	error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT,
195 	    (struct mbuf *)0, nam, (struct mbuf *)0);
196 	splx(s);
197 	return (error);
198 }
199 
200 soconnect(so, nam)
201 	register struct socket *so;
202 	struct mbuf *nam;
203 {
204 	int s;
205 	int error;
206 
207 	if (so->so_options & SO_ACCEPTCONN)
208 		return (EOPNOTSUPP);
209 	s = splnet();
210 	/*
211 	 * If protocol is connection-based, can only connect once.
212 	 * Otherwise, if connected, try to disconnect first.
213 	 * This allows user to disconnect by connecting to, e.g.,
214 	 * a null address.
215 	 */
216 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
217 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
218 	    (error = sodisconnect(so))))
219 		error = EISCONN;
220 	else
221 		error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
222 		    (struct mbuf *)0, nam, (struct mbuf *)0);
223 	splx(s);
224 	return (error);
225 }
226 
227 soconnect2(so1, so2)
228 	register struct socket *so1;
229 	struct socket *so2;
230 {
231 	int s = splnet();
232 	int error;
233 
234 	error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2,
235 	    (struct mbuf *)0, (struct mbuf *)so2, (struct mbuf *)0);
236 	splx(s);
237 	return (error);
238 }
239 
240 sodisconnect(so)
241 	register struct socket *so;
242 {
243 	int s = splnet();
244 	int error;
245 
246 	if ((so->so_state & SS_ISCONNECTED) == 0) {
247 		error = ENOTCONN;
248 		goto bad;
249 	}
250 	if (so->so_state & SS_ISDISCONNECTING) {
251 		error = EALREADY;
252 		goto bad;
253 	}
254 	error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
255 	    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0);
256 bad:
257 	splx(s);
258 	return (error);
259 }
260 
261 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
262 /*
263  * Send on a socket.
264  * If send must go all at once and message is larger than
265  * send buffering, then hard error.
266  * Lock against other senders.
267  * If must go all at once and not enough room now, then
268  * inform user that this would block and do nothing.
269  * Otherwise, if nonblocking, send as much as possible.
270  * The data to be sent is described by "uio" if nonzero,
271  * otherwise by the mbuf chain "top" (which must be null
272  * if uio is not).  Data provided in mbuf chain must be small
273  * enough to send all at once.
274  *
275  * Returns nonzero on error, timeout or signal; callers
276  * must check for short counts if EINTR/ERESTART are returned.
277  * Data and control buffers are freed on return.
278  */
279 sosend(so, addr, uio, top, control, flags)
280 	register struct socket *so;
281 	struct mbuf *addr;
282 	struct uio *uio;
283 	struct mbuf *top;
284 	struct mbuf *control;
285 	int flags;
286 {
287 	struct proc *p = curproc;		/* XXX */
288 	struct mbuf **mp;
289 	register struct mbuf *m;
290 	register long space, len, resid;
291 	int clen = 0, error, s, dontroute, mlen;
292 	int atomic = sosendallatonce(so) || top;
293 
294 	if (uio)
295 		resid = uio->uio_resid;
296 	else
297 		resid = top->m_pkthdr.len;
298 	/*
299 	 * In theory resid should be unsigned.
300 	 * However, space must be signed, as it might be less than 0
301 	 * if we over-committed, and we must use a signed comparison
302 	 * of space and resid.  On the other hand, a negative resid
303 	 * causes us to loop sending 0-length segments to the protocol.
304 	 */
305 	if (resid < 0)
306 		return (EINVAL);
307 	dontroute =
308 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
309 	    (so->so_proto->pr_flags & PR_ATOMIC);
310 	p->p_stats->p_ru.ru_msgsnd++;
311 	if (control)
312 		clen = control->m_len;
313 #define	snderr(errno)	{ error = errno; splx(s); goto release; }
314 
315 restart:
316 	if (error = sblock(&so->so_snd, SBLOCKWAIT(flags)))
317 		goto out;
318 	do {
319 		s = splnet();
320 		if (so->so_state & SS_CANTSENDMORE)
321 			snderr(EPIPE);
322 		if (so->so_error)
323 			snderr(so->so_error);
324 		if ((so->so_state & SS_ISCONNECTED) == 0) {
325 			if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
326 				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
327 				    !(resid == 0 && clen != 0))
328 					snderr(ENOTCONN);
329 			} else if (addr == 0)
330 				snderr(EDESTADDRREQ);
331 		}
332 		space = sbspace(&so->so_snd);
333 		if (flags & MSG_OOB)
334 			space += 1024;
335 		if (atomic && resid > so->so_snd.sb_hiwat ||
336 		    clen > so->so_snd.sb_hiwat)
337 			snderr(EMSGSIZE);
338 		if (space < resid + clen && uio &&
339 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
340 			if (so->so_state & SS_NBIO)
341 				snderr(EWOULDBLOCK);
342 			sbunlock(&so->so_snd);
343 			error = sbwait(&so->so_snd);
344 			splx(s);
345 			if (error)
346 				goto out;
347 			goto restart;
348 		}
349 		splx(s);
350 		mp = &top;
351 		space -= clen;
352 		do {
353 		    if (uio == NULL) {
354 			/*
355 			 * Data is prepackaged in "top".
356 			 */
357 			resid = 0;
358 			if (flags & MSG_EOR)
359 				top->m_flags |= M_EOR;
360 		    } else do {
361 			if (top == 0) {
362 				MGETHDR(m, M_WAIT, MT_DATA);
363 				mlen = MHLEN;
364 				m->m_pkthdr.len = 0;
365 				m->m_pkthdr.rcvif = (struct ifnet *)0;
366 			} else {
367 				MGET(m, M_WAIT, MT_DATA);
368 				mlen = MLEN;
369 			}
370 			if (resid >= MINCLSIZE && space >= MCLBYTES) {
371 				MCLGET(m, M_WAIT);
372 				if ((m->m_flags & M_EXT) == 0)
373 					goto nopages;
374 				mlen = MCLBYTES;
375 #ifdef	MAPPED_MBUFS
376 				len = min(MCLBYTES, resid);
377 #else
378 				if (atomic && top == 0) {
379 					len = min(MCLBYTES - max_hdr, resid);
380 					m->m_data += max_hdr;
381 				} else
382 					len = min(MCLBYTES, resid);
383 #endif
384 				space -= MCLBYTES;
385 			} else {
386 nopages:
387 				len = min(min(mlen, resid), space);
388 				space -= len;
389 				/*
390 				 * For datagram protocols, leave room
391 				 * for protocol headers in first mbuf.
392 				 */
393 				if (atomic && top == 0 && len < mlen)
394 					MH_ALIGN(m, len);
395 			}
396 			error = uiomove(mtod(m, caddr_t), (int)len, uio);
397 			resid = uio->uio_resid;
398 			m->m_len = len;
399 			*mp = m;
400 			top->m_pkthdr.len += len;
401 			if (error)
402 				goto release;
403 			mp = &m->m_next;
404 			if (resid <= 0) {
405 				if (flags & MSG_EOR)
406 					top->m_flags |= M_EOR;
407 				break;
408 			}
409 		    } while (space > 0 && atomic);
410 		    if (dontroute)
411 			    so->so_options |= SO_DONTROUTE;
412 		    s = splnet();				/* XXX */
413 		    error = (*so->so_proto->pr_usrreq)(so,
414 			(flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND,
415 			top, addr, control);
416 		    splx(s);
417 		    if (dontroute)
418 			    so->so_options &= ~SO_DONTROUTE;
419 		    clen = 0;
420 		    control = 0;
421 		    top = 0;
422 		    mp = &top;
423 		    if (error)
424 			goto release;
425 		} while (resid && space > 0);
426 	} while (resid);
427 
428 release:
429 	sbunlock(&so->so_snd);
430 out:
431 	if (top)
432 		m_freem(top);
433 	if (control)
434 		m_freem(control);
435 	return (error);
436 }
437 
438 /*
439  * Implement receive operations on a socket.
440  * We depend on the way that records are added to the sockbuf
441  * by sbappend*.  In particular, each record (mbufs linked through m_next)
442  * must begin with an address if the protocol so specifies,
443  * followed by an optional mbuf or mbufs containing ancillary data,
444  * and then zero or more mbufs of data.
445  * In order to avoid blocking network interrupts for the entire time here,
446  * we splx() while doing the actual copy to user space.
447  * Although the sockbuf is locked, new data may still be appended,
448  * and thus we must maintain consistency of the sockbuf during that time.
449  *
450  * The caller may receive the data as a single mbuf chain by supplying
451  * an mbuf **mp0 for use in returning the chain.  The uio is then used
452  * only for the count in uio_resid.
453  */
454 soreceive(so, paddr, uio, mp0, controlp, flagsp)
455 	register struct socket *so;
456 	struct mbuf **paddr;
457 	struct uio *uio;
458 	struct mbuf **mp0;
459 	struct mbuf **controlp;
460 	int *flagsp;
461 {
462 	register struct mbuf *m, **mp;
463 	register int flags, len, error, s, offset;
464 	struct protosw *pr = so->so_proto;
465 	struct mbuf *nextrecord;
466 	int moff, type;
467 	int orig_resid = uio->uio_resid;
468 
469 	mp = mp0;
470 	if (paddr)
471 		*paddr = 0;
472 	if (controlp)
473 		*controlp = 0;
474 	if (flagsp)
475 		flags = *flagsp &~ MSG_EOR;
476 	else
477 		flags = 0;
478 	if (flags & MSG_OOB) {
479 		m = m_get(M_WAIT, MT_DATA);
480 		error = (*pr->pr_usrreq)(so, PRU_RCVOOB,
481 		    m, (struct mbuf *)(flags & MSG_PEEK), (struct mbuf *)0);
482 		if (error)
483 			goto bad;
484 		do {
485 			error = uiomove(mtod(m, caddr_t),
486 			    (int) min(uio->uio_resid, m->m_len), uio);
487 			m = m_free(m);
488 		} while (uio->uio_resid && error == 0 && m);
489 bad:
490 		if (m)
491 			m_freem(m);
492 		return (error);
493 	}
494 	if (mp)
495 		*mp = (struct mbuf *)0;
496 	if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
497 		(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
498 		    (struct mbuf *)0, (struct mbuf *)0);
499 
500 restart:
501 	if (error = sblock(&so->so_rcv, SBLOCKWAIT(flags)))
502 		return (error);
503 	s = splnet();
504 
505 	m = so->so_rcv.sb_mb;
506 	/*
507 	 * If we have less data than requested, block awaiting more
508 	 * (subject to any timeout) if:
509 	 *   1. the current count is less than the low water mark, or
510 	 *   2. MSG_WAITALL is set, and it is possible to do the entire
511 	 *	receive operation at once if we block (resid <= hiwat).
512 	 *   3. MSG_DONTWAIT is not set
513 	 * If MSG_WAITALL is set but resid is larger than the receive buffer,
514 	 * we have to do the receive in sections, and thus risk returning
515 	 * a short count if a timeout or signal occurs after we start.
516 	 */
517 	if (m == 0 || ((flags & MSG_DONTWAIT) == 0 &&
518 	    so->so_rcv.sb_cc < uio->uio_resid) &&
519 	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
520 	    ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
521 	    m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0) {
522 #ifdef DIAGNOSTIC
523 		if (m == 0 && so->so_rcv.sb_cc)
524 			panic("receive 1");
525 #endif
526 		if (so->so_error) {
527 			if (m)
528 				goto dontblock;
529 			error = so->so_error;
530 			if ((flags & MSG_PEEK) == 0)
531 				so->so_error = 0;
532 			goto release;
533 		}
534 		if (so->so_state & SS_CANTRCVMORE) {
535 			if (m)
536 				goto dontblock;
537 			else
538 				goto release;
539 		}
540 		for (; m; m = m->m_next)
541 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
542 				m = so->so_rcv.sb_mb;
543 				goto dontblock;
544 			}
545 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
546 		    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
547 			error = ENOTCONN;
548 			goto release;
549 		}
550 		if (uio->uio_resid == 0)
551 			goto release;
552 		if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
553 			error = EWOULDBLOCK;
554 			goto release;
555 		}
556 		sbunlock(&so->so_rcv);
557 		error = sbwait(&so->so_rcv);
558 		splx(s);
559 		if (error)
560 			return (error);
561 		goto restart;
562 	}
563 dontblock:
564 	if (uio->uio_procp)
565 		uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
566 	nextrecord = m->m_nextpkt;
567 	if (pr->pr_flags & PR_ADDR) {
568 #ifdef DIAGNOSTIC
569 		if (m->m_type != MT_SONAME)
570 			panic("receive 1a");
571 #endif
572 		orig_resid = 0;
573 		if (flags & MSG_PEEK) {
574 			if (paddr)
575 				*paddr = m_copy(m, 0, m->m_len);
576 			m = m->m_next;
577 		} else {
578 			sbfree(&so->so_rcv, m);
579 			if (paddr) {
580 				*paddr = m;
581 				so->so_rcv.sb_mb = m->m_next;
582 				m->m_next = 0;
583 				m = so->so_rcv.sb_mb;
584 			} else {
585 				MFREE(m, so->so_rcv.sb_mb);
586 				m = so->so_rcv.sb_mb;
587 			}
588 		}
589 	}
590 	while (m && m->m_type == MT_CONTROL && error == 0) {
591 		if (flags & MSG_PEEK) {
592 			if (controlp)
593 				*controlp = m_copy(m, 0, m->m_len);
594 			m = m->m_next;
595 		} else {
596 			sbfree(&so->so_rcv, m);
597 			if (controlp) {
598 				if (pr->pr_domain->dom_externalize &&
599 				    mtod(m, struct cmsghdr *)->cmsg_type ==
600 				    SCM_RIGHTS)
601 				   error = (*pr->pr_domain->dom_externalize)(m);
602 				*controlp = m;
603 				so->so_rcv.sb_mb = m->m_next;
604 				m->m_next = 0;
605 				m = so->so_rcv.sb_mb;
606 			} else {
607 				MFREE(m, so->so_rcv.sb_mb);
608 				m = so->so_rcv.sb_mb;
609 			}
610 		}
611 		if (controlp) {
612 			orig_resid = 0;
613 			controlp = &(*controlp)->m_next;
614 		}
615 	}
616 	if (m) {
617 		if ((flags & MSG_PEEK) == 0)
618 			m->m_nextpkt = nextrecord;
619 		type = m->m_type;
620 		if (type == MT_OOBDATA)
621 			flags |= MSG_OOB;
622 	}
623 	moff = 0;
624 	offset = 0;
625 	while (m && uio->uio_resid > 0 && error == 0) {
626 		if (m->m_type == MT_OOBDATA) {
627 			if (type != MT_OOBDATA)
628 				break;
629 		} else if (type == MT_OOBDATA)
630 			break;
631 #ifdef DIAGNOSTIC
632 		else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
633 			panic("receive 3");
634 #endif
635 		so->so_state &= ~SS_RCVATMARK;
636 		len = uio->uio_resid;
637 		if (so->so_oobmark && len > so->so_oobmark - offset)
638 			len = so->so_oobmark - offset;
639 		if (len > m->m_len - moff)
640 			len = m->m_len - moff;
641 		/*
642 		 * If mp is set, just pass back the mbufs.
643 		 * Otherwise copy them out via the uio, then free.
644 		 * Sockbuf must be consistent here (points to current mbuf,
645 		 * it points to next record) when we drop priority;
646 		 * we must note any additions to the sockbuf when we
647 		 * block interrupts again.
648 		 */
649 		if (mp == 0) {
650 			splx(s);
651 			error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
652 			s = splnet();
653 		} else
654 			uio->uio_resid -= len;
655 		if (len == m->m_len - moff) {
656 			if (m->m_flags & M_EOR)
657 				flags |= MSG_EOR;
658 			if (flags & MSG_PEEK) {
659 				m = m->m_next;
660 				moff = 0;
661 			} else {
662 				nextrecord = m->m_nextpkt;
663 				sbfree(&so->so_rcv, m);
664 				if (mp) {
665 					*mp = m;
666 					mp = &m->m_next;
667 					so->so_rcv.sb_mb = m = m->m_next;
668 					*mp = (struct mbuf *)0;
669 				} else {
670 					MFREE(m, so->so_rcv.sb_mb);
671 					m = so->so_rcv.sb_mb;
672 				}
673 				if (m)
674 					m->m_nextpkt = nextrecord;
675 			}
676 		} else {
677 			if (flags & MSG_PEEK)
678 				moff += len;
679 			else {
680 				if (mp)
681 					*mp = m_copym(m, 0, len, M_WAIT);
682 				m->m_data += len;
683 				m->m_len -= len;
684 				so->so_rcv.sb_cc -= len;
685 			}
686 		}
687 		if (so->so_oobmark) {
688 			if ((flags & MSG_PEEK) == 0) {
689 				so->so_oobmark -= len;
690 				if (so->so_oobmark == 0) {
691 					so->so_state |= SS_RCVATMARK;
692 					break;
693 				}
694 			} else {
695 				offset += len;
696 				if (offset == so->so_oobmark)
697 					break;
698 			}
699 		}
700 		if (flags & MSG_EOR)
701 			break;
702 		/*
703 		 * If the MSG_WAITALL flag is set (for non-atomic socket),
704 		 * we must not quit until "uio->uio_resid == 0" or an error
705 		 * termination.  If a signal/timeout occurs, return
706 		 * with a short count but without error.
707 		 * Keep sockbuf locked against other readers.
708 		 */
709 		while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
710 		    !sosendallatonce(so) && !nextrecord) {
711 			if (so->so_error || so->so_state & SS_CANTRCVMORE)
712 				break;
713 			error = sbwait(&so->so_rcv);
714 			if (error) {
715 				sbunlock(&so->so_rcv);
716 				splx(s);
717 				return (0);
718 			}
719 			if (m = so->so_rcv.sb_mb)
720 				nextrecord = m->m_nextpkt;
721 		}
722 	}
723 
724 	if (m && pr->pr_flags & PR_ATOMIC) {
725 		flags |= MSG_TRUNC;
726 		if ((flags & MSG_PEEK) == 0)
727 			(void) sbdroprecord(&so->so_rcv);
728 	}
729 	if ((flags & MSG_PEEK) == 0) {
730 		if (m == 0)
731 			so->so_rcv.sb_mb = nextrecord;
732 		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
733 			(*pr->pr_usrreq)(so, PRU_RCVD, (struct mbuf *)0,
734 			    (struct mbuf *)flags, (struct mbuf *)0,
735 			    (struct mbuf *)0);
736 	}
737 	if (orig_resid == uio->uio_resid && orig_resid &&
738 	    (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
739 		sbunlock(&so->so_rcv);
740 		splx(s);
741 		goto restart;
742 	}
743 
744 	if (flagsp)
745 		*flagsp |= flags;
746 release:
747 	sbunlock(&so->so_rcv);
748 	splx(s);
749 	return (error);
750 }
751 
752 soshutdown(so, how)
753 	register struct socket *so;
754 	register int how;
755 {
756 	register struct protosw *pr = so->so_proto;
757 
758 	how++;
759 	if (how & FREAD)
760 		sorflush(so);
761 	if (how & FWRITE)
762 		return ((*pr->pr_usrreq)(so, PRU_SHUTDOWN,
763 		    (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0));
764 	return (0);
765 }
766 
767 sorflush(so)
768 	register struct socket *so;
769 {
770 	register struct sockbuf *sb = &so->so_rcv;
771 	register struct protosw *pr = so->so_proto;
772 	register int s;
773 	struct sockbuf asb;
774 
775 	sb->sb_flags |= SB_NOINTR;
776 	(void) sblock(sb, M_WAITOK);
777 	s = splimp();
778 	socantrcvmore(so);
779 	sbunlock(sb);
780 	asb = *sb;
781 	bzero((caddr_t)sb, sizeof (*sb));
782 	splx(s);
783 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
784 		(*pr->pr_domain->dom_dispose)(asb.sb_mb);
785 	sbrelease(&asb);
786 }
787 
788 sosetopt(so, level, optname, m0)
789 	register struct socket *so;
790 	int level, optname;
791 	struct mbuf *m0;
792 {
793 	int error = 0;
794 	register struct mbuf *m = m0;
795 
796 	if (level != SOL_SOCKET) {
797 		if (so->so_proto && so->so_proto->pr_ctloutput)
798 			return ((*so->so_proto->pr_ctloutput)
799 				  (PRCO_SETOPT, so, level, optname, &m0));
800 		error = ENOPROTOOPT;
801 	} else {
802 		switch (optname) {
803 
804 		case SO_LINGER:
805 			if (m == NULL || m->m_len != sizeof (struct linger)) {
806 				error = EINVAL;
807 				goto bad;
808 			}
809 			so->so_linger = mtod(m, struct linger *)->l_linger;
810 			/* fall thru... */
811 
812 		case SO_DEBUG:
813 		case SO_KEEPALIVE:
814 		case SO_DONTROUTE:
815 		case SO_USELOOPBACK:
816 		case SO_BROADCAST:
817 		case SO_REUSEADDR:
818 		case SO_REUSEPORT:
819 		case SO_OOBINLINE:
820 			if (m == NULL || m->m_len < sizeof (int)) {
821 				error = EINVAL;
822 				goto bad;
823 			}
824 			if (*mtod(m, int *))
825 				so->so_options |= optname;
826 			else
827 				so->so_options &= ~optname;
828 			break;
829 
830 		case SO_SNDBUF:
831 		case SO_RCVBUF:
832 		case SO_SNDLOWAT:
833 		case SO_RCVLOWAT:
834 			if (m == NULL || m->m_len < sizeof (int)) {
835 				error = EINVAL;
836 				goto bad;
837 			}
838 			switch (optname) {
839 
840 			case SO_SNDBUF:
841 			case SO_RCVBUF:
842 				if (sbreserve(optname == SO_SNDBUF ?
843 				    &so->so_snd : &so->so_rcv,
844 				    (u_long) *mtod(m, int *)) == 0) {
845 					error = ENOBUFS;
846 					goto bad;
847 				}
848 				break;
849 
850 			case SO_SNDLOWAT:
851 				so->so_snd.sb_lowat = *mtod(m, int *);
852 				break;
853 			case SO_RCVLOWAT:
854 				so->so_rcv.sb_lowat = *mtod(m, int *);
855 				break;
856 			}
857 			break;
858 
859 		case SO_SNDTIMEO:
860 		case SO_RCVTIMEO:
861 		    {
862 			struct timeval *tv;
863 			short val;
864 
865 			if (m == NULL || m->m_len < sizeof (*tv)) {
866 				error = EINVAL;
867 				goto bad;
868 			}
869 			tv = mtod(m, struct timeval *);
870 			if (tv->tv_sec > SHRT_MAX / hz - hz) {
871 				error = EDOM;
872 				goto bad;
873 			}
874 			val = tv->tv_sec * hz + tv->tv_usec / tick;
875 
876 			switch (optname) {
877 
878 			case SO_SNDTIMEO:
879 				so->so_snd.sb_timeo = val;
880 				break;
881 			case SO_RCVTIMEO:
882 				so->so_rcv.sb_timeo = val;
883 				break;
884 			}
885 			break;
886 		    }
887 
888 		default:
889 			error = ENOPROTOOPT;
890 			break;
891 		}
892 		if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
893 			(void) ((*so->so_proto->pr_ctloutput)
894 				  (PRCO_SETOPT, so, level, optname, &m0));
895 			m = NULL;	/* freed by protocol */
896 		}
897 	}
898 bad:
899 	if (m)
900 		(void) m_free(m);
901 	return (error);
902 }
903 
904 sogetopt(so, level, optname, mp)
905 	register struct socket *so;
906 	int level, optname;
907 	struct mbuf **mp;
908 {
909 	register struct mbuf *m;
910 
911 	if (level != SOL_SOCKET) {
912 		if (so->so_proto && so->so_proto->pr_ctloutput) {
913 			return ((*so->so_proto->pr_ctloutput)
914 				  (PRCO_GETOPT, so, level, optname, mp));
915 		} else
916 			return (ENOPROTOOPT);
917 	} else {
918 		m = m_get(M_WAIT, MT_SOOPTS);
919 		m->m_len = sizeof (int);
920 
921 		switch (optname) {
922 
923 		case SO_LINGER:
924 			m->m_len = sizeof (struct linger);
925 			mtod(m, struct linger *)->l_onoff =
926 				so->so_options & SO_LINGER;
927 			mtod(m, struct linger *)->l_linger = so->so_linger;
928 			break;
929 
930 		case SO_USELOOPBACK:
931 		case SO_DONTROUTE:
932 		case SO_DEBUG:
933 		case SO_KEEPALIVE:
934 		case SO_REUSEADDR:
935 		case SO_REUSEPORT:
936 		case SO_BROADCAST:
937 		case SO_OOBINLINE:
938 			*mtod(m, int *) = so->so_options & optname;
939 			break;
940 
941 		case SO_TYPE:
942 			*mtod(m, int *) = so->so_type;
943 			break;
944 
945 		case SO_ERROR:
946 			*mtod(m, int *) = so->so_error;
947 			so->so_error = 0;
948 			break;
949 
950 		case SO_SNDBUF:
951 			*mtod(m, int *) = so->so_snd.sb_hiwat;
952 			break;
953 
954 		case SO_RCVBUF:
955 			*mtod(m, int *) = so->so_rcv.sb_hiwat;
956 			break;
957 
958 		case SO_SNDLOWAT:
959 			*mtod(m, int *) = so->so_snd.sb_lowat;
960 			break;
961 
962 		case SO_RCVLOWAT:
963 			*mtod(m, int *) = so->so_rcv.sb_lowat;
964 			break;
965 
966 		case SO_SNDTIMEO:
967 		case SO_RCVTIMEO:
968 		    {
969 			int val = (optname == SO_SNDTIMEO ?
970 			     so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
971 
972 			m->m_len = sizeof(struct timeval);
973 			mtod(m, struct timeval *)->tv_sec = val / hz;
974 			mtod(m, struct timeval *)->tv_usec =
975 			    (val % hz) / tick;
976 			break;
977 		    }
978 
979 		default:
980 			(void)m_free(m);
981 			return (ENOPROTOOPT);
982 		}
983 		*mp = m;
984 		return (0);
985 	}
986 }
987 
988 sohasoutofband(so)
989 	register struct socket *so;
990 {
991 	struct proc *p;
992 
993 	if (so->so_pgid < 0)
994 		gsignal(-so->so_pgid, SIGURG);
995 	else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
996 		psignal(p, SIGURG);
997 	selwakeup(&so->so_rcv.sb_sel);
998 }
999