1 /* $OpenBSD: uipc_socket.c,v 1.366 2025/01/27 08:20:56 mvs Exp $ */
2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */
3
4 /*
5 * Copyright (c) 1982, 1986, 1988, 1990, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
33 */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/file.h>
39 #include <sys/filedesc.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/domain.h>
43 #include <sys/event.h>
44 #include <sys/protosw.h>
45 #include <sys/socket.h>
46 #include <sys/unpcb.h>
47 #include <sys/socketvar.h>
48 #include <sys/signalvar.h>
49 #include <sys/pool.h>
50 #include <sys/atomic.h>
51 #include <sys/rwlock.h>
52 #include <sys/time.h>
53 #include <sys/refcnt.h>
54
55 #ifdef DDB
56 #include <machine/db_machdep.h>
57 #endif
58
59 void sbsync(struct sockbuf *, struct mbuf *);
60
61 int sosplice(struct socket *, int, off_t, struct timeval *);
62 void sounsplice(struct socket *, struct socket *, int);
63 void soidle(void *);
64 void sotask(void *);
65 int somove(struct socket *, int);
66 void sorflush(struct socket *);
67
68 void filt_sordetach(struct knote *kn);
69 int filt_soread(struct knote *kn, long hint);
70 void filt_sowdetach(struct knote *kn);
71 int filt_sowrite(struct knote *kn, long hint);
72 int filt_soexcept(struct knote *kn, long hint);
73
74 int filt_sowmodify(struct kevent *kev, struct knote *kn);
75 int filt_sowprocess(struct knote *kn, struct kevent *kev);
76
77 int filt_sormodify(struct kevent *kev, struct knote *kn);
78 int filt_sorprocess(struct knote *kn, struct kevent *kev);
79
80 const struct filterops soread_filtops = {
81 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
82 .f_attach = NULL,
83 .f_detach = filt_sordetach,
84 .f_event = filt_soread,
85 .f_modify = filt_sormodify,
86 .f_process = filt_sorprocess,
87 };
88
89 const struct filterops sowrite_filtops = {
90 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
91 .f_attach = NULL,
92 .f_detach = filt_sowdetach,
93 .f_event = filt_sowrite,
94 .f_modify = filt_sowmodify,
95 .f_process = filt_sowprocess,
96 };
97
98 const struct filterops soexcept_filtops = {
99 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
100 .f_attach = NULL,
101 .f_detach = filt_sordetach,
102 .f_event = filt_soexcept,
103 .f_modify = filt_sormodify,
104 .f_process = filt_sorprocess,
105 };
106
107 #ifndef SOMINCONN
108 #define SOMINCONN 80
109 #endif /* SOMINCONN */
110
111 int somaxconn = SOMAXCONN;
112 int sominconn = SOMINCONN;
113
114 struct pool socket_pool;
115 #ifdef SOCKET_SPLICE
116 struct pool sosplice_pool;
117 struct taskq *sosplice_taskq;
118 struct rwlock sosplice_lock = RWLOCK_INITIALIZER("sosplicelk");
119 #endif
120
121 void
soinit(void)122 soinit(void)
123 {
124 pool_init(&socket_pool, sizeof(struct socket), 0, IPL_SOFTNET, 0,
125 "sockpl", NULL);
126 #ifdef SOCKET_SPLICE
127 pool_init(&sosplice_pool, sizeof(struct sosplice), 0, IPL_SOFTNET, 0,
128 "sosppl", NULL);
129 #endif
130 }
131
132 struct socket *
soalloc(const struct protosw * prp,int wait)133 soalloc(const struct protosw *prp, int wait)
134 {
135 const struct domain *dp = prp->pr_domain;
136 const char *dom_name = dp->dom_name;
137 struct socket *so;
138
139 so = pool_get(&socket_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) |
140 PR_ZERO);
141 if (so == NULL)
142 return (NULL);
143
144 #ifdef WITNESS
145 /*
146 * XXX: Make WITNESS happy. AF_INET and AF_INET6 sockets could be
147 * spliced together.
148 */
149 switch (dp->dom_family) {
150 case AF_INET:
151 case AF_INET6:
152 dom_name = "inet46";
153 break;
154 }
155 #endif
156
157 refcnt_init_trace(&so->so_refcnt, DT_REFCNT_IDX_SOCKET);
158 rw_init_flags(&so->so_lock, dom_name, RWL_DUPOK);
159 rw_init(&so->so_rcv.sb_lock, "sbufrcv");
160 rw_init(&so->so_snd.sb_lock, "sbufsnd");
161 mtx_init_flags(&so->so_rcv.sb_mtx, IPL_MPFLOOR, "sbrcv", 0);
162 mtx_init_flags(&so->so_snd.sb_mtx, IPL_MPFLOOR, "sbsnd", 0);
163 klist_init_mutex(&so->so_rcv.sb_klist, &so->so_rcv.sb_mtx);
164 klist_init_mutex(&so->so_snd.sb_klist, &so->so_snd.sb_mtx);
165 sigio_init(&so->so_sigio);
166 TAILQ_INIT(&so->so_q0);
167 TAILQ_INIT(&so->so_q);
168
169 return (so);
170 }
171
172 /*
173 * Socket operation routines.
174 * These routines are called by the routines in
175 * sys_socket.c or from a system process, and
176 * implement the semantics of socket operations by
177 * switching out to the protocol specific routines.
178 */
179 int
socreate(int dom,struct socket ** aso,int type,int proto)180 socreate(int dom, struct socket **aso, int type, int proto)
181 {
182 struct proc *p = curproc; /* XXX */
183 const struct protosw *prp;
184 struct socket *so;
185 int error;
186
187 if (proto)
188 prp = pffindproto(dom, proto, type);
189 else
190 prp = pffindtype(dom, type);
191 if (prp == NULL || prp->pr_usrreqs == NULL)
192 return (EPROTONOSUPPORT);
193 if (prp->pr_type != type)
194 return (EPROTOTYPE);
195 so = soalloc(prp, M_WAIT);
196 so->so_type = type;
197 if (suser(p) == 0)
198 so->so_state = SS_PRIV;
199 so->so_ruid = p->p_ucred->cr_ruid;
200 so->so_euid = p->p_ucred->cr_uid;
201 so->so_rgid = p->p_ucred->cr_rgid;
202 so->so_egid = p->p_ucred->cr_gid;
203 so->so_cpid = p->p_p->ps_pid;
204 so->so_proto = prp;
205 so->so_snd.sb_timeo_nsecs = INFSLP;
206 so->so_rcv.sb_timeo_nsecs = INFSLP;
207
208 solock(so);
209 error = pru_attach(so, proto, M_WAIT);
210 if (error) {
211 so->so_state |= SS_NOFDREF;
212 /* sofree() calls sounlock(). */
213 sofree(so, 0);
214 return (error);
215 }
216 sounlock(so);
217 *aso = so;
218 return (0);
219 }
220
221 int
sobind(struct socket * so,struct mbuf * nam,struct proc * p)222 sobind(struct socket *so, struct mbuf *nam, struct proc *p)
223 {
224 soassertlocked(so);
225 return pru_bind(so, nam, p);
226 }
227
228 int
solisten(struct socket * so,int backlog)229 solisten(struct socket *so, int backlog)
230 {
231 int somaxconn_local = atomic_load_int(&somaxconn);
232 int sominconn_local = atomic_load_int(&sominconn);
233 int error;
234
235 switch (so->so_type) {
236 case SOCK_STREAM:
237 case SOCK_SEQPACKET:
238 break;
239 default:
240 return (EOPNOTSUPP);
241 }
242
243 soassertlocked(so);
244
245 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING))
246 return (EINVAL);
247 #ifdef SOCKET_SPLICE
248 if (isspliced(so) || issplicedback(so))
249 return (EOPNOTSUPP);
250 #endif /* SOCKET_SPLICE */
251 error = pru_listen(so);
252 if (error)
253 return (error);
254 if (TAILQ_FIRST(&so->so_q) == NULL)
255 so->so_options |= SO_ACCEPTCONN;
256 if (backlog < 0 || backlog > somaxconn_local)
257 backlog = somaxconn_local;
258 if (backlog < sominconn_local)
259 backlog = sominconn_local;
260 so->so_qlimit = backlog;
261 return (0);
262 }
263
264 void
sorele(struct socket * so)265 sorele(struct socket *so)
266 {
267 if (refcnt_rele(&so->so_refcnt) == 0)
268 return;
269
270 sigio_free(&so->so_sigio);
271 klist_free(&so->so_rcv.sb_klist);
272 klist_free(&so->so_snd.sb_klist);
273
274 mtx_enter(&so->so_snd.sb_mtx);
275 sbrelease(so, &so->so_snd);
276 mtx_leave(&so->so_snd.sb_mtx);
277
278 if (so->so_proto->pr_flags & PR_RIGHTS &&
279 so->so_proto->pr_domain->dom_dispose)
280 (*so->so_proto->pr_domain->dom_dispose)(so->so_rcv.sb_mb);
281 m_purge(so->so_rcv.sb_mb);
282
283 #ifdef SOCKET_SPLICE
284 if (so->so_sp)
285 pool_put(&sosplice_pool, so->so_sp);
286 #endif
287 pool_put(&socket_pool, so);
288 }
289
290 #define SOSP_FREEING_READ 1
291 #define SOSP_FREEING_WRITE 2
292 void
sofree(struct socket * so,int keep_lock)293 sofree(struct socket *so, int keep_lock)
294 {
295 int persocket = solock_persocket(so);
296
297 soassertlocked(so);
298
299 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
300 if (!keep_lock)
301 sounlock(so);
302 return;
303 }
304 if (so->so_head) {
305 struct socket *head = so->so_head;
306
307 /*
308 * We must not decommission a socket that's on the accept(2)
309 * queue. If we do, then accept(2) may hang after select(2)
310 * indicated that the listening socket was ready.
311 */
312 if (so->so_onq == &head->so_q) {
313 if (!keep_lock)
314 sounlock(so);
315 return;
316 }
317
318 if (persocket) {
319 soref(head);
320 sounlock(so);
321 solock(head);
322 solock(so);
323
324 if (so->so_onq != &head->so_q0) {
325 sounlock(so);
326 sounlock(head);
327 sorele(head);
328 return;
329 }
330 }
331
332 soqremque(so, 0);
333
334 if (persocket) {
335 sounlock(head);
336 sorele(head);
337 }
338 }
339
340 if (!keep_lock)
341 sounlock(so);
342 sorele(so);
343 }
344
345 static inline uint64_t
solinger_nsec(struct socket * so)346 solinger_nsec(struct socket *so)
347 {
348 if (so->so_linger == 0)
349 return INFSLP;
350
351 return SEC_TO_NSEC(so->so_linger);
352 }
353
354 /*
355 * Close a socket on last file table reference removal.
356 * Initiate disconnect if connected.
357 * Free socket when disconnect complete.
358 */
359 int
soclose(struct socket * so,int flags)360 soclose(struct socket *so, int flags)
361 {
362 struct socket *so2;
363 int error = 0;
364
365 solock(so);
366 /* Revoke async IO early. There is a final revocation in sofree(). */
367 sigio_free(&so->so_sigio);
368 if (so->so_state & SS_ISCONNECTED) {
369 if (so->so_pcb == NULL)
370 goto discard;
371 if ((so->so_state & SS_ISDISCONNECTING) == 0) {
372 error = sodisconnect(so);
373 if (error)
374 goto drop;
375 }
376 if (so->so_options & SO_LINGER) {
377 if ((so->so_state & SS_ISDISCONNECTING) &&
378 (flags & MSG_DONTWAIT))
379 goto drop;
380 while (so->so_state & SS_ISCONNECTED) {
381 error = sosleep_nsec(so, &so->so_timeo,
382 PSOCK | PCATCH, "netcls",
383 solinger_nsec(so));
384 if (error)
385 break;
386 }
387 }
388 }
389 drop:
390 if (so->so_pcb) {
391 int error2;
392 error2 = pru_detach(so);
393 if (error == 0)
394 error = error2;
395 }
396 if (so->so_options & SO_ACCEPTCONN) {
397 int persocket = solock_persocket(so);
398
399 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
400 soref(so2);
401 solock(so2);
402 (void) soqremque(so2, 0);
403 sounlock(so);
404 soabort(so2);
405 sounlock(so2);
406 sorele(so2);
407 solock(so);
408 }
409 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) {
410 soref(so2);
411 solock_nonet(so2);
412 (void) soqremque(so2, 1);
413 if (persocket)
414 sounlock(so);
415 soabort(so2);
416 sounlock_nonet(so2);
417 sorele(so2);
418 if (persocket)
419 solock(so);
420 }
421 }
422 discard:
423 #ifdef SOCKET_SPLICE
424 if (so->so_sp) {
425 struct socket *soback;
426
427 sounlock(so);
428 mtx_enter(&so->so_snd.sb_mtx);
429 /*
430 * Concurrent sounsplice() locks `sb_mtx' mutexes on
431 * both `so_snd' and `so_rcv' before unsplice sockets.
432 */
433 if ((soback = so->so_sp->ssp_soback) == NULL) {
434 mtx_leave(&so->so_snd.sb_mtx);
435 goto notsplicedback;
436 }
437 soref(soback);
438 mtx_leave(&so->so_snd.sb_mtx);
439
440 /*
441 * `so' can be only unspliced, and never spliced again.
442 * Thus if issplicedback(so) check is positive, socket is
443 * still spliced and `ssp_soback' points to the same
444 * socket that `soback'.
445 */
446 sblock(&soback->so_rcv, SBL_WAIT | SBL_NOINTR);
447 if (issplicedback(so)) {
448 int freeing = SOSP_FREEING_WRITE;
449
450 if (so->so_sp->ssp_soback == so)
451 freeing |= SOSP_FREEING_READ;
452 sounsplice(so->so_sp->ssp_soback, so, freeing);
453 }
454 sbunlock(&soback->so_rcv);
455 sorele(soback);
456
457 notsplicedback:
458 sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR);
459 if (isspliced(so)) {
460 struct socket *sosp;
461 int freeing = SOSP_FREEING_READ;
462
463 if (so == so->so_sp->ssp_socket)
464 freeing |= SOSP_FREEING_WRITE;
465 sosp = soref(so->so_sp->ssp_socket);
466 sounsplice(so, so->so_sp->ssp_socket, freeing);
467 sorele(sosp);
468 }
469 sbunlock(&so->so_rcv);
470
471 timeout_del_barrier(&so->so_sp->ssp_idleto);
472 task_del(sosplice_taskq, &so->so_sp->ssp_task);
473 taskq_barrier(sosplice_taskq);
474
475 solock(so);
476 }
477 #endif /* SOCKET_SPLICE */
478
479 if (so->so_state & SS_NOFDREF)
480 panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type);
481 so->so_state |= SS_NOFDREF;
482
483 /* sofree() calls sounlock(). */
484 sofree(so, 0);
485 return (error);
486 }
487
488 void
soabort(struct socket * so)489 soabort(struct socket *so)
490 {
491 soassertlocked(so);
492 pru_abort(so);
493 }
494
495 int
soaccept(struct socket * so,struct mbuf * nam)496 soaccept(struct socket *so, struct mbuf *nam)
497 {
498 int error = 0;
499
500 soassertlocked(so);
501
502 if ((so->so_state & SS_NOFDREF) == 0)
503 panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type);
504 so->so_state &= ~SS_NOFDREF;
505 if ((so->so_state & SS_ISDISCONNECTED) == 0 ||
506 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0)
507 error = pru_accept(so, nam);
508 else
509 error = ECONNABORTED;
510 return (error);
511 }
512
513 int
soconnect(struct socket * so,struct mbuf * nam)514 soconnect(struct socket *so, struct mbuf *nam)
515 {
516 int error;
517
518 soassertlocked(so);
519
520 if (so->so_options & SO_ACCEPTCONN)
521 return (EOPNOTSUPP);
522 /*
523 * If protocol is connection-based, can only connect once.
524 * Otherwise, if connected, try to disconnect first.
525 * This allows user to disconnect by connecting to, e.g.,
526 * a null address.
527 */
528 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
529 ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
530 (error = sodisconnect(so))))
531 error = EISCONN;
532 else
533 error = pru_connect(so, nam);
534 return (error);
535 }
536
537 int
soconnect2(struct socket * so1,struct socket * so2)538 soconnect2(struct socket *so1, struct socket *so2)
539 {
540 int persocket, error;
541
542 if ((persocket = solock_persocket(so1)))
543 solock_pair(so1, so2);
544 else
545 solock(so1);
546
547 error = pru_connect2(so1, so2);
548
549 if (persocket)
550 sounlock(so2);
551 sounlock(so1);
552 return (error);
553 }
554
555 int
sodisconnect(struct socket * so)556 sodisconnect(struct socket *so)
557 {
558 int error;
559
560 soassertlocked(so);
561
562 if ((so->so_state & SS_ISCONNECTED) == 0)
563 return (ENOTCONN);
564 if (so->so_state & SS_ISDISCONNECTING)
565 return (EALREADY);
566 error = pru_disconnect(so);
567 return (error);
568 }
569
570 int m_getuio(struct mbuf **, int, long, struct uio *);
571
572 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
573 /*
574 * Send on a socket.
575 * If send must go all at once and message is larger than
576 * send buffering, then hard error.
577 * Lock against other senders.
578 * If must go all at once and not enough room now, then
579 * inform user that this would block and do nothing.
580 * Otherwise, if nonblocking, send as much as possible.
581 * The data to be sent is described by "uio" if nonzero,
582 * otherwise by the mbuf chain "top" (which must be null
583 * if uio is not). Data provided in mbuf chain must be small
584 * enough to send all at once.
585 *
586 * Returns nonzero on error, timeout or signal; callers
587 * must check for short counts if EINTR/ERESTART are returned.
588 * Data and control buffers are freed on return.
589 */
590 int
sosend(struct socket * so,struct mbuf * addr,struct uio * uio,struct mbuf * top,struct mbuf * control,int flags)591 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top,
592 struct mbuf *control, int flags)
593 {
594 long space, clen = 0;
595 size_t resid;
596 int error;
597 int atomic = sosendallatonce(so) || top;
598
599 if (uio)
600 resid = uio->uio_resid;
601 else
602 resid = top->m_pkthdr.len;
603 /* MSG_EOR on a SOCK_STREAM socket is invalid. */
604 if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) {
605 m_freem(top);
606 m_freem(control);
607 return (EINVAL);
608 }
609 if (uio && uio->uio_procp)
610 uio->uio_procp->p_ru.ru_msgsnd++;
611 if (control) {
612 /*
613 * In theory clen should be unsigned (since control->m_len is).
614 * However, space must be signed, as it might be less than 0
615 * if we over-committed, and we must use a signed comparison
616 * of space and clen.
617 */
618 clen = control->m_len;
619 /* reserve extra space for AF_UNIX's internalize */
620 if (so->so_proto->pr_domain->dom_family == AF_UNIX &&
621 clen >= CMSG_ALIGN(sizeof(struct cmsghdr)) &&
622 mtod(control, struct cmsghdr *)->cmsg_type == SCM_RIGHTS)
623 clen = CMSG_SPACE(
624 (clen - CMSG_ALIGN(sizeof(struct cmsghdr))) *
625 (sizeof(struct fdpass) / sizeof(int)));
626 }
627
628 #define snderr(errno) { error = errno; goto release; }
629
630 restart:
631 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0)
632 goto out;
633 mtx_enter(&so->so_snd.sb_mtx);
634 so->so_snd.sb_state |= SS_ISSENDING;
635 do {
636 if (so->so_snd.sb_state & SS_CANTSENDMORE)
637 snderr(EPIPE);
638 if ((error = READ_ONCE(so->so_error))) {
639 so->so_error = 0;
640 snderr(error);
641 }
642 if ((so->so_state & SS_ISCONNECTED) == 0) {
643 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
644 if (!(resid == 0 && clen != 0))
645 snderr(ENOTCONN);
646 } else if (addr == NULL)
647 snderr(EDESTADDRREQ);
648 }
649 space = sbspace_locked(so, &so->so_snd);
650 if (flags & MSG_OOB)
651 space += 1024;
652 if (so->so_proto->pr_domain->dom_family == AF_UNIX) {
653 if (atomic && resid > so->so_snd.sb_hiwat)
654 snderr(EMSGSIZE);
655 } else {
656 if (clen > so->so_snd.sb_hiwat ||
657 (atomic && resid > so->so_snd.sb_hiwat - clen))
658 snderr(EMSGSIZE);
659 }
660 if (space < clen ||
661 (space - clen < resid &&
662 (atomic || space < so->so_snd.sb_lowat))) {
663 if (flags & MSG_DONTWAIT)
664 snderr(EWOULDBLOCK);
665 sbunlock(&so->so_snd);
666 error = sbwait(&so->so_snd);
667 so->so_snd.sb_state &= ~SS_ISSENDING;
668 mtx_leave(&so->so_snd.sb_mtx);
669 if (error)
670 goto out;
671 goto restart;
672 }
673 space -= clen;
674 do {
675 if (uio == NULL) {
676 /*
677 * Data is prepackaged in "top".
678 */
679 resid = 0;
680 if (flags & MSG_EOR)
681 top->m_flags |= M_EOR;
682 } else {
683 mtx_leave(&so->so_snd.sb_mtx);
684 error = m_getuio(&top, atomic, space, uio);
685 mtx_enter(&so->so_snd.sb_mtx);
686 if (error)
687 goto release;
688 space -= top->m_pkthdr.len;
689 resid = uio->uio_resid;
690 if (flags & MSG_EOR)
691 top->m_flags |= M_EOR;
692 }
693 if (resid == 0)
694 so->so_snd.sb_state &= ~SS_ISSENDING;
695 if (top && so->so_options & SO_ZEROIZE)
696 top->m_flags |= M_ZEROIZE;
697 mtx_leave(&so->so_snd.sb_mtx);
698 solock_shared(so);
699 if (flags & MSG_OOB)
700 error = pru_sendoob(so, top, addr, control);
701 else
702 error = pru_send(so, top, addr, control);
703 sounlock_shared(so);
704 mtx_enter(&so->so_snd.sb_mtx);
705 clen = 0;
706 control = NULL;
707 top = NULL;
708 if (error)
709 goto release;
710 } while (resid && space > 0);
711 } while (resid);
712
713 release:
714 so->so_snd.sb_state &= ~SS_ISSENDING;
715 mtx_leave(&so->so_snd.sb_mtx);
716 sbunlock(&so->so_snd);
717 out:
718 m_freem(top);
719 m_freem(control);
720 return (error);
721 }
722
723 int
m_getuio(struct mbuf ** mp,int atomic,long space,struct uio * uio)724 m_getuio(struct mbuf **mp, int atomic, long space, struct uio *uio)
725 {
726 struct mbuf *m, *top = NULL;
727 struct mbuf **nextp = ⊤
728 u_long len, mlen;
729 size_t resid = uio->uio_resid;
730 int error;
731
732 do {
733 if (top == NULL) {
734 MGETHDR(m, M_WAIT, MT_DATA);
735 mlen = MHLEN;
736 } else {
737 MGET(m, M_WAIT, MT_DATA);
738 mlen = MLEN;
739 }
740 /* chain mbuf together */
741 *nextp = m;
742 nextp = &m->m_next;
743
744 resid = ulmin(resid, space);
745 if (resid >= MINCLSIZE) {
746 MCLGETL(m, M_NOWAIT, ulmin(resid, MAXMCLBYTES));
747 if ((m->m_flags & M_EXT) == 0)
748 MCLGETL(m, M_NOWAIT, MCLBYTES);
749 if ((m->m_flags & M_EXT) == 0)
750 goto nopages;
751 mlen = m->m_ext.ext_size;
752 len = ulmin(mlen, resid);
753 /*
754 * For datagram protocols, leave room
755 * for protocol headers in first mbuf.
756 */
757 if (atomic && m == top && len < mlen - max_hdr)
758 m->m_data += max_hdr;
759 } else {
760 nopages:
761 len = ulmin(mlen, resid);
762 /*
763 * For datagram protocols, leave room
764 * for protocol headers in first mbuf.
765 */
766 if (atomic && m == top && len < mlen - max_hdr)
767 m_align(m, len);
768 }
769
770 error = uiomove(mtod(m, caddr_t), len, uio);
771 if (error) {
772 m_freem(top);
773 return (error);
774 }
775
776 /* adjust counters */
777 resid = uio->uio_resid;
778 space -= len;
779 m->m_len = len;
780 top->m_pkthdr.len += len;
781
782 /* Is there more space and more data? */
783 } while (space > 0 && resid > 0);
784
785 *mp = top;
786 return 0;
787 }
788
789 /*
790 * Following replacement or removal of the first mbuf on the first
791 * mbuf chain of a socket buffer, push necessary state changes back
792 * into the socket buffer so that other consumers see the values
793 * consistently. 'nextrecord' is the callers locally stored value of
794 * the original value of sb->sb_mb->m_nextpkt which must be restored
795 * when the lead mbuf changes. NOTE: 'nextrecord' may be NULL.
796 */
797 void
sbsync(struct sockbuf * sb,struct mbuf * nextrecord)798 sbsync(struct sockbuf *sb, struct mbuf *nextrecord)
799 {
800
801 /*
802 * First, update for the new value of nextrecord. If necessary,
803 * make it the first record.
804 */
805 if (sb->sb_mb != NULL)
806 sb->sb_mb->m_nextpkt = nextrecord;
807 else
808 sb->sb_mb = nextrecord;
809
810 /*
811 * Now update any dependent socket buffer fields to reflect
812 * the new state. This is an inline of SB_EMPTY_FIXUP, with
813 * the addition of a second clause that takes care of the
814 * case where sb_mb has been updated, but remains the last
815 * record.
816 */
817 if (sb->sb_mb == NULL) {
818 sb->sb_mbtail = NULL;
819 sb->sb_lastrecord = NULL;
820 } else if (sb->sb_mb->m_nextpkt == NULL)
821 sb->sb_lastrecord = sb->sb_mb;
822 }
823
824 /*
825 * Implement receive operations on a socket.
826 * We depend on the way that records are added to the sockbuf
827 * by sbappend*. In particular, each record (mbufs linked through m_next)
828 * must begin with an address if the protocol so specifies,
829 * followed by an optional mbuf or mbufs containing ancillary data,
830 * and then zero or more mbufs of data.
831 * In order to avoid blocking network for the entire time here, we release
832 * the solock() while doing the actual copy to user space.
833 * Although the sockbuf is locked, new data may still be appended,
834 * and thus we must maintain consistency of the sockbuf during that time.
835 *
836 * The caller may receive the data as a single mbuf chain by supplying
837 * an mbuf **mp0 for use in returning the chain. The uio is then used
838 * only for the count in uio_resid.
839 */
840 int
soreceive(struct socket * so,struct mbuf ** paddr,struct uio * uio,struct mbuf ** mp0,struct mbuf ** controlp,int * flagsp,socklen_t controllen)841 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio,
842 struct mbuf **mp0, struct mbuf **controlp, int *flagsp,
843 socklen_t controllen)
844 {
845 struct mbuf *m, **mp;
846 struct mbuf *cm;
847 u_long len, offset, moff;
848 int flags, error, error2, type, uio_error = 0;
849 const struct protosw *pr = so->so_proto;
850 struct mbuf *nextrecord;
851 size_t resid, orig_resid = uio->uio_resid;
852
853 mp = mp0;
854 if (paddr)
855 *paddr = NULL;
856 if (controlp)
857 *controlp = NULL;
858 if (flagsp)
859 flags = *flagsp &~ MSG_EOR;
860 else
861 flags = 0;
862 if (flags & MSG_OOB) {
863 m = m_get(M_WAIT, MT_DATA);
864 solock_shared(so);
865 error = pru_rcvoob(so, m, flags & MSG_PEEK);
866 sounlock_shared(so);
867 if (error)
868 goto bad;
869 do {
870 error = uiomove(mtod(m, caddr_t),
871 ulmin(uio->uio_resid, m->m_len), uio);
872 m = m_free(m);
873 } while (uio->uio_resid && error == 0 && m);
874 bad:
875 m_freem(m);
876 return (error);
877 }
878 if (mp)
879 *mp = NULL;
880
881 restart:
882 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0)
883 return (error);
884 mtx_enter(&so->so_rcv.sb_mtx);
885
886 m = so->so_rcv.sb_mb;
887 #ifdef SOCKET_SPLICE
888 if (isspliced(so))
889 m = NULL;
890 #endif /* SOCKET_SPLICE */
891 /*
892 * If we have less data than requested, block awaiting more
893 * (subject to any timeout) if:
894 * 1. the current count is less than the low water mark,
895 * 2. MSG_WAITALL is set, and it is possible to do the entire
896 * receive operation at once if we block (resid <= hiwat), or
897 * 3. MSG_DONTWAIT is not set.
898 * If MSG_WAITALL is set but resid is larger than the receive buffer,
899 * we have to do the receive in sections, and thus risk returning
900 * a short count if a timeout or signal occurs after we start.
901 */
902 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
903 so->so_rcv.sb_cc < uio->uio_resid) &&
904 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
905 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
906 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) {
907 #ifdef DIAGNOSTIC
908 if (m == NULL && so->so_rcv.sb_cc)
909 #ifdef SOCKET_SPLICE
910 if (!isspliced(so))
911 #endif /* SOCKET_SPLICE */
912 panic("receive 1: so %p, so_type %d, sb_cc %lu",
913 so, so->so_type, so->so_rcv.sb_cc);
914 #endif
915 if ((error2 = READ_ONCE(so->so_error))) {
916 if (m)
917 goto dontblock;
918 error = error2;
919 if ((flags & MSG_PEEK) == 0)
920 so->so_error = 0;
921 goto release;
922 }
923 if (so->so_rcv.sb_state & SS_CANTRCVMORE) {
924 if (m)
925 goto dontblock;
926 else if (so->so_rcv.sb_cc == 0)
927 goto release;
928 }
929 for (; m; m = m->m_next)
930 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) {
931 m = so->so_rcv.sb_mb;
932 goto dontblock;
933 }
934 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
935 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
936 error = ENOTCONN;
937 goto release;
938 }
939 if (uio->uio_resid == 0 && controlp == NULL)
940 goto release;
941 if (flags & MSG_DONTWAIT) {
942 error = EWOULDBLOCK;
943 goto release;
944 }
945 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
946 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
947
948 sbunlock(&so->so_rcv);
949 error = sbwait(&so->so_rcv);
950 mtx_leave(&so->so_rcv.sb_mtx);
951 if (error)
952 return (error);
953 goto restart;
954 }
955 dontblock:
956 /*
957 * On entry here, m points to the first record of the socket buffer.
958 * From this point onward, we maintain 'nextrecord' as a cache of the
959 * pointer to the next record in the socket buffer. We must keep the
960 * various socket buffer pointers and local stack versions of the
961 * pointers in sync, pushing out modifications before operations that
962 * may sleep, and re-reading them afterwards.
963 *
964 * Otherwise, we will race with the network stack appending new data
965 * or records onto the socket buffer by using inconsistent/stale
966 * versions of the field, possibly resulting in socket buffer
967 * corruption.
968 */
969 if (uio->uio_procp)
970 uio->uio_procp->p_ru.ru_msgrcv++;
971 KASSERT(m == so->so_rcv.sb_mb);
972 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1");
973 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1");
974 nextrecord = m->m_nextpkt;
975 if (pr->pr_flags & PR_ADDR) {
976 #ifdef DIAGNOSTIC
977 if (m->m_type != MT_SONAME)
978 panic("receive 1a: so %p, so_type %d, m %p, m_type %d",
979 so, so->so_type, m, m->m_type);
980 #endif
981 orig_resid = 0;
982 if (flags & MSG_PEEK) {
983 if (paddr)
984 *paddr = m_copym(m, 0, m->m_len, M_NOWAIT);
985 m = m->m_next;
986 } else {
987 sbfree(&so->so_rcv, m);
988 if (paddr) {
989 *paddr = m;
990 so->so_rcv.sb_mb = m->m_next;
991 m->m_next = NULL;
992 m = so->so_rcv.sb_mb;
993 } else {
994 so->so_rcv.sb_mb = m_free(m);
995 m = so->so_rcv.sb_mb;
996 }
997 sbsync(&so->so_rcv, nextrecord);
998 }
999 }
1000 while (m && m->m_type == MT_CONTROL && error == 0) {
1001 int skip = 0;
1002 if (flags & MSG_PEEK) {
1003 if (mtod(m, struct cmsghdr *)->cmsg_type ==
1004 SCM_RIGHTS) {
1005 /* don't leak internalized SCM_RIGHTS msgs */
1006 skip = 1;
1007 } else if (controlp)
1008 *controlp = m_copym(m, 0, m->m_len, M_NOWAIT);
1009 m = m->m_next;
1010 } else {
1011 sbfree(&so->so_rcv, m);
1012 so->so_rcv.sb_mb = m->m_next;
1013 m->m_nextpkt = m->m_next = NULL;
1014 cm = m;
1015 m = so->so_rcv.sb_mb;
1016 sbsync(&so->so_rcv, nextrecord);
1017 if (controlp) {
1018 if (pr->pr_domain->dom_externalize) {
1019 mtx_leave(&so->so_rcv.sb_mtx);
1020 error =
1021 (*pr->pr_domain->dom_externalize)
1022 (cm, controllen, flags);
1023 mtx_enter(&so->so_rcv.sb_mtx);
1024 }
1025 *controlp = cm;
1026 } else {
1027 /*
1028 * Dispose of any SCM_RIGHTS message that went
1029 * through the read path rather than recv.
1030 */
1031 if (pr->pr_domain->dom_dispose) {
1032 mtx_leave(&so->so_rcv.sb_mtx);
1033 pr->pr_domain->dom_dispose(cm);
1034 mtx_enter(&so->so_rcv.sb_mtx);
1035 }
1036 m_free(cm);
1037 }
1038 }
1039 if (m != NULL)
1040 nextrecord = so->so_rcv.sb_mb->m_nextpkt;
1041 else
1042 nextrecord = so->so_rcv.sb_mb;
1043 if (controlp && !skip)
1044 controlp = &(*controlp)->m_next;
1045 orig_resid = 0;
1046 }
1047
1048 /* If m is non-NULL, we have some data to read. */
1049 if (m) {
1050 type = m->m_type;
1051 if (type == MT_OOBDATA)
1052 flags |= MSG_OOB;
1053 if (m->m_flags & M_BCAST)
1054 flags |= MSG_BCAST;
1055 if (m->m_flags & M_MCAST)
1056 flags |= MSG_MCAST;
1057 }
1058 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2");
1059 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2");
1060
1061 moff = 0;
1062 offset = 0;
1063 while (m && uio->uio_resid > 0 && error == 0) {
1064 if (m->m_type == MT_OOBDATA) {
1065 if (type != MT_OOBDATA)
1066 break;
1067 } else if (type == MT_OOBDATA) {
1068 break;
1069 } else if (m->m_type == MT_CONTROL) {
1070 /*
1071 * If there is more than one control message in the
1072 * stream, we do a short read. Next can be received
1073 * or disposed by another system call.
1074 */
1075 break;
1076 #ifdef DIAGNOSTIC
1077 } else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) {
1078 panic("receive 3: so %p, so_type %d, m %p, m_type %d",
1079 so, so->so_type, m, m->m_type);
1080 #endif
1081 }
1082 so->so_rcv.sb_state &= ~SS_RCVATMARK;
1083 len = uio->uio_resid;
1084 if (so->so_oobmark && len > so->so_oobmark - offset)
1085 len = so->so_oobmark - offset;
1086 if (len > m->m_len - moff)
1087 len = m->m_len - moff;
1088 /*
1089 * If mp is set, just pass back the mbufs.
1090 * Otherwise copy them out via the uio, then free.
1091 * Sockbuf must be consistent here (points to current mbuf,
1092 * it points to next record) when we drop priority;
1093 * we must note any additions to the sockbuf when we
1094 * block interrupts again.
1095 */
1096 if (mp == NULL && uio_error == 0) {
1097 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
1098 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
1099 resid = uio->uio_resid;
1100 mtx_leave(&so->so_rcv.sb_mtx);
1101 uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
1102 mtx_enter(&so->so_rcv.sb_mtx);
1103 if (uio_error)
1104 uio->uio_resid = resid - len;
1105 } else
1106 uio->uio_resid -= len;
1107 if (len == m->m_len - moff) {
1108 if (m->m_flags & M_EOR)
1109 flags |= MSG_EOR;
1110 if (flags & MSG_PEEK) {
1111 m = m->m_next;
1112 moff = 0;
1113 orig_resid = 0;
1114 } else {
1115 nextrecord = m->m_nextpkt;
1116 sbfree(&so->so_rcv, m);
1117 if (mp) {
1118 *mp = m;
1119 mp = &m->m_next;
1120 so->so_rcv.sb_mb = m = m->m_next;
1121 *mp = NULL;
1122 } else {
1123 so->so_rcv.sb_mb = m_free(m);
1124 m = so->so_rcv.sb_mb;
1125 }
1126 /*
1127 * If m != NULL, we also know that
1128 * so->so_rcv.sb_mb != NULL.
1129 */
1130 KASSERT(so->so_rcv.sb_mb == m);
1131 if (m) {
1132 m->m_nextpkt = nextrecord;
1133 if (nextrecord == NULL)
1134 so->so_rcv.sb_lastrecord = m;
1135 } else {
1136 so->so_rcv.sb_mb = nextrecord;
1137 SB_EMPTY_FIXUP(&so->so_rcv);
1138 }
1139 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3");
1140 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3");
1141 }
1142 } else {
1143 if (flags & MSG_PEEK) {
1144 moff += len;
1145 orig_resid = 0;
1146 } else {
1147 if (mp)
1148 *mp = m_copym(m, 0, len, M_WAIT);
1149 m->m_data += len;
1150 m->m_len -= len;
1151 so->so_rcv.sb_cc -= len;
1152 so->so_rcv.sb_datacc -= len;
1153 }
1154 }
1155 if (so->so_oobmark) {
1156 if ((flags & MSG_PEEK) == 0) {
1157 so->so_oobmark -= len;
1158 if (so->so_oobmark == 0) {
1159 so->so_rcv.sb_state |= SS_RCVATMARK;
1160 break;
1161 }
1162 } else {
1163 offset += len;
1164 if (offset == so->so_oobmark)
1165 break;
1166 }
1167 }
1168 if (flags & MSG_EOR)
1169 break;
1170 /*
1171 * If the MSG_WAITALL flag is set (for non-atomic socket),
1172 * we must not quit until "uio->uio_resid == 0" or an error
1173 * termination. If a signal/timeout occurs, return
1174 * with a short count but without error.
1175 * Keep sockbuf locked against other readers.
1176 */
1177 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 &&
1178 !sosendallatonce(so) && !nextrecord) {
1179 if (so->so_rcv.sb_state & SS_CANTRCVMORE ||
1180 so->so_error)
1181 break;
1182 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2");
1183 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2");
1184 if (sbwait(&so->so_rcv)) {
1185 mtx_leave(&so->so_rcv.sb_mtx);
1186 sbunlock(&so->so_rcv);
1187 return (0);
1188 }
1189 if ((m = so->so_rcv.sb_mb) != NULL)
1190 nextrecord = m->m_nextpkt;
1191 }
1192 }
1193
1194 if (m && pr->pr_flags & PR_ATOMIC) {
1195 flags |= MSG_TRUNC;
1196 if ((flags & MSG_PEEK) == 0)
1197 (void) sbdroprecord(so, &so->so_rcv);
1198 }
1199 if ((flags & MSG_PEEK) == 0) {
1200 if (m == NULL) {
1201 /*
1202 * First part is an inline SB_EMPTY_FIXUP(). Second
1203 * part makes sure sb_lastrecord is up-to-date if
1204 * there is still data in the socket buffer.
1205 */
1206 so->so_rcv.sb_mb = nextrecord;
1207 if (so->so_rcv.sb_mb == NULL) {
1208 so->so_rcv.sb_mbtail = NULL;
1209 so->so_rcv.sb_lastrecord = NULL;
1210 } else if (nextrecord->m_nextpkt == NULL)
1211 so->so_rcv.sb_lastrecord = nextrecord;
1212 }
1213 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4");
1214 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4");
1215 if (pr->pr_flags & PR_WANTRCVD) {
1216 mtx_leave(&so->so_rcv.sb_mtx);
1217 solock_shared(so);
1218 pru_rcvd(so);
1219 sounlock_shared(so);
1220 mtx_enter(&so->so_rcv.sb_mtx);
1221 }
1222 }
1223 if (orig_resid == uio->uio_resid && orig_resid &&
1224 (flags & MSG_EOR) == 0 &&
1225 (so->so_rcv.sb_state & SS_CANTRCVMORE) == 0) {
1226 mtx_leave(&so->so_rcv.sb_mtx);
1227 sbunlock(&so->so_rcv);
1228 goto restart;
1229 }
1230
1231 if (uio_error)
1232 error = uio_error;
1233
1234 if (flagsp)
1235 *flagsp |= flags;
1236 release:
1237 mtx_leave(&so->so_rcv.sb_mtx);
1238 sbunlock(&so->so_rcv);
1239 return (error);
1240 }
1241
1242 int
soshutdown(struct socket * so,int how)1243 soshutdown(struct socket *so, int how)
1244 {
1245 int error = 0;
1246
1247 switch (how) {
1248 case SHUT_RD:
1249 sorflush(so);
1250 break;
1251 case SHUT_RDWR:
1252 sorflush(so);
1253 /* FALLTHROUGH */
1254 case SHUT_WR:
1255 solock(so);
1256 error = pru_shutdown(so);
1257 sounlock(so);
1258 break;
1259 default:
1260 error = EINVAL;
1261 break;
1262 }
1263
1264 return (error);
1265 }
1266
1267 void
sorflush(struct socket * so)1268 sorflush(struct socket *so)
1269 {
1270 struct sockbuf *sb = &so->so_rcv;
1271 struct mbuf *m;
1272 const struct protosw *pr = so->so_proto;
1273 int error;
1274
1275 error = sblock(sb, SBL_WAIT | SBL_NOINTR);
1276 /* with SBL_WAIT and SLB_NOINTR sblock() must not fail */
1277 KASSERT(error == 0);
1278
1279 solock_shared(so);
1280 socantrcvmore(so);
1281 sounlock_shared(so);
1282 mtx_enter(&sb->sb_mtx);
1283 m = sb->sb_mb;
1284 memset(&sb->sb_startzero, 0,
1285 (caddr_t)&sb->sb_endzero - (caddr_t)&sb->sb_startzero);
1286 sb->sb_timeo_nsecs = INFSLP;
1287 mtx_leave(&sb->sb_mtx);
1288 sbunlock(sb);
1289
1290 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
1291 (*pr->pr_domain->dom_dispose)(m);
1292 m_purge(m);
1293 }
1294
1295 #ifdef SOCKET_SPLICE
1296
1297 #define so_splicelen so_sp->ssp_len
1298 #define so_splicemax so_sp->ssp_max
1299 #define so_idletv so_sp->ssp_idletv
1300 #define so_idleto so_sp->ssp_idleto
1301 #define so_splicetask so_sp->ssp_task
1302
1303 void
sosplice_solock_pair(struct socket * so1,struct socket * so2)1304 sosplice_solock_pair(struct socket *so1, struct socket *so2)
1305 {
1306 NET_LOCK_SHARED();
1307
1308 if (so1 == so2)
1309 rw_enter_write(&so1->so_lock);
1310 else if (so1 < so2) {
1311 rw_enter_write(&so1->so_lock);
1312 rw_enter_write(&so2->so_lock);
1313 } else {
1314 rw_enter_write(&so2->so_lock);
1315 rw_enter_write(&so1->so_lock);
1316 }
1317 }
1318
1319 void
sosplice_sounlock_pair(struct socket * so1,struct socket * so2)1320 sosplice_sounlock_pair(struct socket *so1, struct socket *so2)
1321 {
1322 if (so1 == so2)
1323 rw_exit_write(&so1->so_lock);
1324 else if (so1 < so2) {
1325 rw_exit_write(&so2->so_lock);
1326 rw_exit_write(&so1->so_lock);
1327 } else {
1328 rw_exit_write(&so1->so_lock);
1329 rw_exit_write(&so2->so_lock);
1330 }
1331
1332 NET_UNLOCK_SHARED();
1333 }
1334
1335 int
sosplice(struct socket * so,int fd,off_t max,struct timeval * tv)1336 sosplice(struct socket *so, int fd, off_t max, struct timeval *tv)
1337 {
1338 struct file *fp;
1339 struct socket *sosp;
1340 struct taskq *tq;
1341 int error = 0;
1342
1343 if ((so->so_proto->pr_flags & PR_SPLICE) == 0)
1344 return (EPROTONOSUPPORT);
1345 if (max && max < 0)
1346 return (EINVAL);
1347 if (tv && (tv->tv_sec < 0 || !timerisvalid(tv)))
1348 return (EINVAL);
1349
1350 /* If no fd is given, unsplice by removing existing link. */
1351 if (fd < 0) {
1352 if ((error = sblock(&so->so_rcv, SBL_WAIT)) != 0)
1353 return (error);
1354 if (so->so_sp && so->so_sp->ssp_socket) {
1355 sosp = soref(so->so_sp->ssp_socket);
1356 sounsplice(so, so->so_sp->ssp_socket, 0);
1357 sorele(sosp);
1358 } else
1359 error = EPROTO;
1360 sbunlock(&so->so_rcv);
1361 return (error);
1362 }
1363
1364 if (sosplice_taskq == NULL) {
1365 rw_enter_write(&sosplice_lock);
1366 if (sosplice_taskq == NULL) {
1367 tq = taskq_create("sosplice", 1, IPL_SOFTNET,
1368 TASKQ_MPSAFE);
1369 if (tq == NULL) {
1370 rw_exit_write(&sosplice_lock);
1371 return (ENOMEM);
1372 }
1373 /* Ensure the taskq is fully visible to other CPUs. */
1374 membar_producer();
1375 sosplice_taskq = tq;
1376 }
1377 rw_exit_write(&sosplice_lock);
1378 } else {
1379 /* Ensure the taskq is fully visible on this CPU. */
1380 membar_consumer();
1381 }
1382
1383 /* Find sosp, the drain socket where data will be spliced into. */
1384 if ((error = getsock(curproc, fd, &fp)) != 0)
1385 return (error);
1386 sosp = fp->f_data;
1387
1388 if (sosp->so_proto->pr_usrreqs->pru_send !=
1389 so->so_proto->pr_usrreqs->pru_send) {
1390 error = EPROTONOSUPPORT;
1391 goto frele;
1392 }
1393
1394 if ((error = sblock(&so->so_rcv, SBL_WAIT)) != 0)
1395 goto frele;
1396 if ((error = sblock(&sosp->so_snd, SBL_WAIT)) != 0) {
1397 sbunlock(&so->so_rcv);
1398 goto frele;
1399 }
1400 sosplice_solock_pair(so, sosp);
1401
1402 if ((so->so_options & SO_ACCEPTCONN) ||
1403 (sosp->so_options & SO_ACCEPTCONN)) {
1404 error = EOPNOTSUPP;
1405 goto release;
1406 }
1407 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
1408 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
1409 error = ENOTCONN;
1410 goto release;
1411 }
1412 if ((sosp->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) {
1413 error = ENOTCONN;
1414 goto release;
1415 }
1416 if (so->so_sp == NULL) {
1417 struct sosplice *so_sp;
1418
1419 so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO);
1420 timeout_set_flags(&so_sp->ssp_idleto, soidle, so,
1421 KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE);
1422 task_set(&so_sp->ssp_task, sotask, so);
1423
1424 so->so_sp = so_sp;
1425 }
1426 if (sosp->so_sp == NULL) {
1427 struct sosplice *so_sp;
1428
1429 so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO);
1430 timeout_set_flags(&so_sp->ssp_idleto, soidle, sosp,
1431 KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE);
1432 task_set(&so_sp->ssp_task, sotask, sosp);
1433
1434 sosp->so_sp = so_sp;
1435 }
1436 if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) {
1437 error = EBUSY;
1438 goto release;
1439 }
1440
1441 so->so_splicelen = 0;
1442 so->so_splicemax = max;
1443 if (tv)
1444 so->so_idletv = *tv;
1445 else
1446 timerclear(&so->so_idletv);
1447
1448 /*
1449 * To prevent sorwakeup() calling somove() before this somove()
1450 * has finished, the socket buffers are not marked as spliced yet.
1451 */
1452
1453 /* Splice so and sosp together. */
1454 mtx_enter(&so->so_rcv.sb_mtx);
1455 mtx_enter(&sosp->so_snd.sb_mtx);
1456 so->so_sp->ssp_socket = sosp;
1457 sosp->so_sp->ssp_soback = so;
1458 mtx_leave(&sosp->so_snd.sb_mtx);
1459 mtx_leave(&so->so_rcv.sb_mtx);
1460
1461 sosplice_sounlock_pair(so, sosp);
1462 sbunlock(&sosp->so_snd);
1463
1464 if (somove(so, M_WAIT)) {
1465 mtx_enter(&so->so_rcv.sb_mtx);
1466 mtx_enter(&sosp->so_snd.sb_mtx);
1467 so->so_rcv.sb_flags |= SB_SPLICE;
1468 sosp->so_snd.sb_flags |= SB_SPLICE;
1469 mtx_leave(&sosp->so_snd.sb_mtx);
1470 mtx_leave(&so->so_rcv.sb_mtx);
1471 }
1472
1473 sbunlock(&so->so_rcv);
1474 FRELE(fp, curproc);
1475 return (0);
1476
1477 release:
1478 sosplice_sounlock_pair(so, sosp);
1479 sbunlock(&sosp->so_snd);
1480 sbunlock(&so->so_rcv);
1481 frele:
1482 FRELE(fp, curproc);
1483 return (error);
1484 }
1485
1486 void
sounsplice(struct socket * so,struct socket * sosp,int freeing)1487 sounsplice(struct socket *so, struct socket *sosp, int freeing)
1488 {
1489 sbassertlocked(&so->so_rcv);
1490
1491 mtx_enter(&so->so_rcv.sb_mtx);
1492 mtx_enter(&sosp->so_snd.sb_mtx);
1493 so->so_rcv.sb_flags &= ~SB_SPLICE;
1494 sosp->so_snd.sb_flags &= ~SB_SPLICE;
1495 so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL;
1496 mtx_leave(&sosp->so_snd.sb_mtx);
1497 mtx_leave(&so->so_rcv.sb_mtx);
1498
1499 task_del(sosplice_taskq, &so->so_splicetask);
1500 timeout_del(&so->so_idleto);
1501
1502 /* Do not wakeup a socket that is about to be freed. */
1503 if ((freeing & SOSP_FREEING_READ) == 0) {
1504 int readable;
1505
1506 solock_shared(so);
1507 mtx_enter(&so->so_rcv.sb_mtx);
1508 readable = soreadable(so);
1509 mtx_leave(&so->so_rcv.sb_mtx);
1510 if (readable)
1511 sorwakeup(so);
1512 sounlock_shared(so);
1513 }
1514 if ((freeing & SOSP_FREEING_WRITE) == 0) {
1515 solock_shared(sosp);
1516 if (sowriteable(sosp))
1517 sowwakeup(sosp);
1518 sounlock_shared(sosp);
1519 }
1520 }
1521
1522 void
soidle(void * arg)1523 soidle(void *arg)
1524 {
1525 struct socket *so = arg;
1526
1527 sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR);
1528 if (so->so_rcv.sb_flags & SB_SPLICE) {
1529 struct socket *sosp;
1530
1531 WRITE_ONCE(so->so_error, ETIMEDOUT);
1532 sosp = soref(so->so_sp->ssp_socket);
1533 sounsplice(so, so->so_sp->ssp_socket, 0);
1534 sorele(sosp);
1535 }
1536 sbunlock(&so->so_rcv);
1537 }
1538
1539 void
sotask(void * arg)1540 sotask(void *arg)
1541 {
1542 struct socket *so = arg;
1543 int doyield = 0;
1544
1545 sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR);
1546 if (so->so_rcv.sb_flags & SB_SPLICE) {
1547 if (so->so_proto->pr_flags & PR_WANTRCVD)
1548 doyield = 1;
1549 somove(so, M_DONTWAIT);
1550 }
1551 sbunlock(&so->so_rcv);
1552
1553 if (doyield) {
1554 /* Avoid user land starvation. */
1555 yield();
1556 }
1557 }
1558
1559 /*
1560 * Move data from receive buffer of spliced source socket to send
1561 * buffer of drain socket. Try to move as much as possible in one
1562 * big chunk. It is a TCP only implementation.
1563 * Return value 0 means splicing has been finished, 1 continue.
1564 */
1565 int
somove(struct socket * so,int wait)1566 somove(struct socket *so, int wait)
1567 {
1568 struct socket *sosp = so->so_sp->ssp_socket;
1569 struct mbuf *m, **mp, *nextrecord;
1570 u_long len, off, oobmark;
1571 long space;
1572 int error = 0, maxreached = 0, unsplice = 0;
1573 unsigned int rcvstate;
1574
1575 sbassertlocked(&so->so_rcv);
1576
1577 if (so->so_proto->pr_flags & PR_WANTRCVD)
1578 sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR);
1579
1580 mtx_enter(&so->so_rcv.sb_mtx);
1581 mtx_enter(&sosp->so_snd.sb_mtx);
1582
1583 nextpkt:
1584 if ((error = READ_ONCE(so->so_error)))
1585 goto release;
1586 if (sosp->so_snd.sb_state & SS_CANTSENDMORE) {
1587 error = EPIPE;
1588 goto release;
1589 }
1590
1591 error = READ_ONCE(sosp->so_error);
1592 if (error) {
1593 if (error != ETIMEDOUT && error != EFBIG && error != ELOOP)
1594 goto release;
1595 error = 0;
1596 }
1597 if ((sosp->so_state & SS_ISCONNECTED) == 0)
1598 goto release;
1599
1600 /* Calculate how many bytes can be copied now. */
1601 len = so->so_rcv.sb_datacc;
1602 if (so->so_splicemax) {
1603 KASSERT(so->so_splicelen < so->so_splicemax);
1604 if (so->so_splicemax <= so->so_splicelen + len) {
1605 len = so->so_splicemax - so->so_splicelen;
1606 maxreached = 1;
1607 }
1608 }
1609 space = sbspace_locked(sosp, &sosp->so_snd);
1610 if (so->so_oobmark && so->so_oobmark < len &&
1611 so->so_oobmark < space + 1024)
1612 space += 1024;
1613 if (space <= 0) {
1614 maxreached = 0;
1615 goto release;
1616 }
1617 if (space < len) {
1618 maxreached = 0;
1619 if (space < sosp->so_snd.sb_lowat)
1620 goto release;
1621 len = space;
1622 }
1623 sosp->so_snd.sb_state |= SS_ISSENDING;
1624
1625 SBLASTRECORDCHK(&so->so_rcv, "somove 1");
1626 SBLASTMBUFCHK(&so->so_rcv, "somove 1");
1627 m = so->so_rcv.sb_mb;
1628 if (m == NULL)
1629 goto release;
1630 nextrecord = m->m_nextpkt;
1631
1632 /* Drop address and control information not used with splicing. */
1633 if (so->so_proto->pr_flags & PR_ADDR) {
1634 #ifdef DIAGNOSTIC
1635 if (m->m_type != MT_SONAME)
1636 panic("somove soname: so %p, so_type %d, m %p, "
1637 "m_type %d", so, so->so_type, m, m->m_type);
1638 #endif
1639 m = m->m_next;
1640 }
1641 while (m && m->m_type == MT_CONTROL)
1642 m = m->m_next;
1643 if (m == NULL) {
1644 sbdroprecord(so, &so->so_rcv);
1645 if (so->so_proto->pr_flags & PR_WANTRCVD) {
1646 mtx_leave(&sosp->so_snd.sb_mtx);
1647 mtx_leave(&so->so_rcv.sb_mtx);
1648 solock_shared(so);
1649 pru_rcvd(so);
1650 sounlock_shared(so);
1651 mtx_enter(&so->so_rcv.sb_mtx);
1652 mtx_enter(&sosp->so_snd.sb_mtx);
1653 }
1654 goto nextpkt;
1655 }
1656
1657 /*
1658 * By splicing sockets connected to localhost, userland might create a
1659 * loop. Dissolve splicing with error if loop is detected by counter.
1660 *
1661 * If we deal with looped broadcast/multicast packet we bail out with
1662 * no error to suppress splice termination.
1663 */
1664 if ((m->m_flags & M_PKTHDR) &&
1665 ((m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) ||
1666 ((m->m_flags & M_LOOP) && (m->m_flags & (M_BCAST|M_MCAST))))) {
1667 error = ELOOP;
1668 goto release;
1669 }
1670
1671 if (so->so_proto->pr_flags & PR_ATOMIC) {
1672 if ((m->m_flags & M_PKTHDR) == 0)
1673 panic("somove !PKTHDR: so %p, so_type %d, m %p, "
1674 "m_type %d", so, so->so_type, m, m->m_type);
1675 if (sosp->so_snd.sb_hiwat < m->m_pkthdr.len) {
1676 error = EMSGSIZE;
1677 goto release;
1678 }
1679 if (len < m->m_pkthdr.len)
1680 goto release;
1681 if (m->m_pkthdr.len < len) {
1682 maxreached = 0;
1683 len = m->m_pkthdr.len;
1684 }
1685 /*
1686 * Throw away the name mbuf after it has been assured
1687 * that the whole first record can be processed.
1688 */
1689 m = so->so_rcv.sb_mb;
1690 sbfree(&so->so_rcv, m);
1691 so->so_rcv.sb_mb = m_free(m);
1692 sbsync(&so->so_rcv, nextrecord);
1693 }
1694 /*
1695 * Throw away the control mbufs after it has been assured
1696 * that the whole first record can be processed.
1697 */
1698 m = so->so_rcv.sb_mb;
1699 while (m && m->m_type == MT_CONTROL) {
1700 sbfree(&so->so_rcv, m);
1701 so->so_rcv.sb_mb = m_free(m);
1702 m = so->so_rcv.sb_mb;
1703 sbsync(&so->so_rcv, nextrecord);
1704 }
1705
1706 SBLASTRECORDCHK(&so->so_rcv, "somove 2");
1707 SBLASTMBUFCHK(&so->so_rcv, "somove 2");
1708
1709 /* Take at most len mbufs out of receive buffer. */
1710 for (off = 0, mp = &m; off <= len && *mp;
1711 off += (*mp)->m_len, mp = &(*mp)->m_next) {
1712 u_long size = len - off;
1713
1714 #ifdef DIAGNOSTIC
1715 if ((*mp)->m_type != MT_DATA && (*mp)->m_type != MT_HEADER)
1716 panic("somove type: so %p, so_type %d, m %p, "
1717 "m_type %d", so, so->so_type, *mp, (*mp)->m_type);
1718 #endif
1719 if ((*mp)->m_len > size) {
1720 /*
1721 * Move only a partial mbuf at maximum splice length or
1722 * if the drain buffer is too small for this large mbuf.
1723 */
1724 if (!maxreached && sosp->so_snd.sb_datacc > 0) {
1725 len -= size;
1726 break;
1727 }
1728 *mp = m_copym(so->so_rcv.sb_mb, 0, size, wait);
1729 if (*mp == NULL) {
1730 len -= size;
1731 break;
1732 }
1733 so->so_rcv.sb_mb->m_data += size;
1734 so->so_rcv.sb_mb->m_len -= size;
1735 so->so_rcv.sb_cc -= size;
1736 so->so_rcv.sb_datacc -= size;
1737 } else {
1738 *mp = so->so_rcv.sb_mb;
1739 sbfree(&so->so_rcv, *mp);
1740 so->so_rcv.sb_mb = (*mp)->m_next;
1741 sbsync(&so->so_rcv, nextrecord);
1742 }
1743 }
1744 *mp = NULL;
1745
1746 SBLASTRECORDCHK(&so->so_rcv, "somove 3");
1747 SBLASTMBUFCHK(&so->so_rcv, "somove 3");
1748 SBCHECK(so, &so->so_rcv);
1749 if (m == NULL)
1750 goto release;
1751 m->m_nextpkt = NULL;
1752 if (m->m_flags & M_PKTHDR) {
1753 m_resethdr(m);
1754 m->m_pkthdr.len = len;
1755 }
1756
1757 /* Receive buffer did shrink by len bytes, adjust oob. */
1758 rcvstate = so->so_rcv.sb_state;
1759 so->so_rcv.sb_state &= ~SS_RCVATMARK;
1760 oobmark = so->so_oobmark;
1761 so->so_oobmark = oobmark > len ? oobmark - len : 0;
1762 if (oobmark) {
1763 if (oobmark == len)
1764 so->so_rcv.sb_state |= SS_RCVATMARK;
1765 if (oobmark >= len)
1766 oobmark = 0;
1767 }
1768
1769 /* Send window update to source peer as receive buffer has changed. */
1770 if (so->so_proto->pr_flags & PR_WANTRCVD) {
1771 mtx_leave(&sosp->so_snd.sb_mtx);
1772 mtx_leave(&so->so_rcv.sb_mtx);
1773 solock_shared(so);
1774 pru_rcvd(so);
1775 sounlock_shared(so);
1776 mtx_enter(&so->so_rcv.sb_mtx);
1777 mtx_enter(&sosp->so_snd.sb_mtx);
1778 }
1779
1780 /*
1781 * Handle oob data. If any malloc fails, ignore error.
1782 * TCP urgent data is not very reliable anyway.
1783 */
1784 while (((rcvstate & SS_RCVATMARK) || oobmark) &&
1785 (so->so_options & SO_OOBINLINE)) {
1786 struct mbuf *o = NULL;
1787
1788 if (rcvstate & SS_RCVATMARK) {
1789 o = m_get(wait, MT_DATA);
1790 rcvstate &= ~SS_RCVATMARK;
1791 } else if (oobmark) {
1792 o = m_split(m, oobmark, wait);
1793 if (o) {
1794 mtx_leave(&sosp->so_snd.sb_mtx);
1795 mtx_leave(&so->so_rcv.sb_mtx);
1796 solock_shared(sosp);
1797 error = pru_send(sosp, m, NULL, NULL);
1798 sounlock_shared(sosp);
1799 mtx_enter(&so->so_rcv.sb_mtx);
1800 mtx_enter(&sosp->so_snd.sb_mtx);
1801
1802 if (error) {
1803 if (sosp->so_snd.sb_state &
1804 SS_CANTSENDMORE)
1805 error = EPIPE;
1806 m_freem(o);
1807 goto release;
1808 }
1809 len -= oobmark;
1810 so->so_splicelen += oobmark;
1811 m = o;
1812 o = m_get(wait, MT_DATA);
1813 }
1814 oobmark = 0;
1815 }
1816 if (o) {
1817 o->m_len = 1;
1818 *mtod(o, caddr_t) = *mtod(m, caddr_t);
1819
1820 mtx_leave(&sosp->so_snd.sb_mtx);
1821 mtx_leave(&so->so_rcv.sb_mtx);
1822 solock_shared(sosp);
1823 error = pru_sendoob(sosp, o, NULL, NULL);
1824 sounlock_shared(sosp);
1825 mtx_enter(&so->so_rcv.sb_mtx);
1826 mtx_enter(&sosp->so_snd.sb_mtx);
1827
1828 if (error) {
1829 if (sosp->so_snd.sb_state & SS_CANTSENDMORE)
1830 error = EPIPE;
1831 m_freem(m);
1832 goto release;
1833 }
1834 len -= 1;
1835 so->so_splicelen += 1;
1836 if (oobmark) {
1837 oobmark -= 1;
1838 if (oobmark == 0)
1839 rcvstate |= SS_RCVATMARK;
1840 }
1841 m_adj(m, 1);
1842 }
1843 }
1844
1845 /* Append all remaining data to drain socket. */
1846 if (so->so_rcv.sb_cc == 0 || maxreached)
1847 sosp->so_snd.sb_state &= ~SS_ISSENDING;
1848
1849 mtx_leave(&sosp->so_snd.sb_mtx);
1850 mtx_leave(&so->so_rcv.sb_mtx);
1851 solock_shared(sosp);
1852 error = pru_send(sosp, m, NULL, NULL);
1853 sounlock_shared(sosp);
1854 mtx_enter(&so->so_rcv.sb_mtx);
1855 mtx_enter(&sosp->so_snd.sb_mtx);
1856
1857 if (error) {
1858 if (sosp->so_snd.sb_state & SS_CANTSENDMORE ||
1859 sosp->so_pcb == NULL)
1860 error = EPIPE;
1861 goto release;
1862 }
1863 so->so_splicelen += len;
1864
1865 /* Move several packets if possible. */
1866 if (!maxreached && nextrecord)
1867 goto nextpkt;
1868
1869 release:
1870 sosp->so_snd.sb_state &= ~SS_ISSENDING;
1871
1872 if (!error && maxreached && so->so_splicemax == so->so_splicelen)
1873 error = EFBIG;
1874 if (error)
1875 WRITE_ONCE(so->so_error, error);
1876
1877 if (((so->so_rcv.sb_state & SS_CANTRCVMORE) &&
1878 so->so_rcv.sb_cc == 0) ||
1879 (sosp->so_snd.sb_state & SS_CANTSENDMORE) ||
1880 maxreached || error)
1881 unsplice = 1;
1882
1883 mtx_leave(&sosp->so_snd.sb_mtx);
1884 mtx_leave(&so->so_rcv.sb_mtx);
1885
1886 if (so->so_proto->pr_flags & PR_WANTRCVD)
1887 sbunlock(&so->so_snd);
1888
1889 if (unsplice) {
1890 soref(sosp);
1891 sounsplice(so, sosp, 0);
1892 sorele(sosp);
1893
1894 return (0);
1895 }
1896 if (timerisset(&so->so_idletv))
1897 timeout_add_tv(&so->so_idleto, &so->so_idletv);
1898 return (1);
1899 }
1900 #endif /* SOCKET_SPLICE */
1901
1902 void
sorwakeup(struct socket * so)1903 sorwakeup(struct socket *so)
1904 {
1905 #ifdef SOCKET_SPLICE
1906 if (so->so_proto->pr_flags & PR_SPLICE) {
1907 mtx_enter(&so->so_rcv.sb_mtx);
1908 if (so->so_rcv.sb_flags & SB_SPLICE)
1909 task_add(sosplice_taskq, &so->so_splicetask);
1910 if (isspliced(so)) {
1911 mtx_leave(&so->so_rcv.sb_mtx);
1912 return;
1913 }
1914 mtx_leave(&so->so_rcv.sb_mtx);
1915 }
1916 #endif
1917 sowakeup(so, &so->so_rcv);
1918 if (so->so_upcall)
1919 (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT);
1920 }
1921
1922 void
sowwakeup(struct socket * so)1923 sowwakeup(struct socket *so)
1924 {
1925 #ifdef SOCKET_SPLICE
1926 if (so->so_proto->pr_flags & PR_SPLICE) {
1927 mtx_enter(&so->so_snd.sb_mtx);
1928 if (so->so_snd.sb_flags & SB_SPLICE)
1929 task_add(sosplice_taskq,
1930 &so->so_sp->ssp_soback->so_splicetask);
1931 if (issplicedback(so)) {
1932 mtx_leave(&so->so_snd.sb_mtx);
1933 return;
1934 }
1935 mtx_leave(&so->so_snd.sb_mtx);
1936 }
1937 #endif
1938 sowakeup(so, &so->so_snd);
1939 }
1940
1941 int
sosetopt(struct socket * so,int level,int optname,struct mbuf * m)1942 sosetopt(struct socket *so, int level, int optname, struct mbuf *m)
1943 {
1944 int error = 0;
1945
1946 if (level != SOL_SOCKET) {
1947 if (so->so_proto->pr_ctloutput) {
1948 solock(so);
1949 error = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so,
1950 level, optname, m);
1951 sounlock(so);
1952 return (error);
1953 }
1954 error = ENOPROTOOPT;
1955 } else {
1956 switch (optname) {
1957
1958 case SO_LINGER:
1959 if (m == NULL || m->m_len != sizeof (struct linger) ||
1960 mtod(m, struct linger *)->l_linger < 0 ||
1961 mtod(m, struct linger *)->l_linger > SHRT_MAX)
1962 return (EINVAL);
1963
1964 solock(so);
1965 so->so_linger = mtod(m, struct linger *)->l_linger;
1966 if (*mtod(m, int *))
1967 so->so_options |= optname;
1968 else
1969 so->so_options &= ~optname;
1970 sounlock(so);
1971
1972 break;
1973 case SO_BINDANY:
1974 if ((error = suser(curproc)) != 0) /* XXX */
1975 return (error);
1976 /* FALLTHROUGH */
1977
1978 case SO_DEBUG:
1979 case SO_KEEPALIVE:
1980 case SO_USELOOPBACK:
1981 case SO_BROADCAST:
1982 case SO_REUSEADDR:
1983 case SO_REUSEPORT:
1984 case SO_OOBINLINE:
1985 case SO_TIMESTAMP:
1986 case SO_ZEROIZE:
1987 if (m == NULL || m->m_len < sizeof (int))
1988 return (EINVAL);
1989
1990 solock(so);
1991 if (*mtod(m, int *))
1992 so->so_options |= optname;
1993 else
1994 so->so_options &= ~optname;
1995 sounlock(so);
1996
1997 break;
1998 case SO_DONTROUTE:
1999 if (m == NULL || m->m_len < sizeof (int))
2000 return (EINVAL);
2001 if (*mtod(m, int *))
2002 error = EOPNOTSUPP;
2003 break;
2004
2005 case SO_SNDBUF:
2006 case SO_RCVBUF:
2007 case SO_SNDLOWAT:
2008 case SO_RCVLOWAT:
2009 {
2010 struct sockbuf *sb = (optname == SO_SNDBUF ||
2011 optname == SO_SNDLOWAT ?
2012 &so->so_snd : &so->so_rcv);
2013 u_long cnt;
2014
2015 if (m == NULL || m->m_len < sizeof (int))
2016 return (EINVAL);
2017 cnt = *mtod(m, int *);
2018 if ((long)cnt <= 0)
2019 cnt = 1;
2020
2021 mtx_enter(&sb->sb_mtx);
2022 switch (optname) {
2023 case SO_SNDBUF:
2024 case SO_RCVBUF:
2025 if (sb->sb_state &
2026 (SS_CANTSENDMORE | SS_CANTRCVMORE)) {
2027 error = EINVAL;
2028 break;
2029 }
2030 if (sbcheckreserve(cnt, sb->sb_wat) ||
2031 sbreserve(so, sb, cnt)) {
2032 error = ENOBUFS;
2033 break;
2034 }
2035 sb->sb_wat = cnt;
2036 break;
2037 case SO_SNDLOWAT:
2038 case SO_RCVLOWAT:
2039 sb->sb_lowat = (cnt > sb->sb_hiwat) ?
2040 sb->sb_hiwat : cnt;
2041 break;
2042 }
2043 mtx_leave(&sb->sb_mtx);
2044
2045 break;
2046 }
2047
2048 case SO_SNDTIMEO:
2049 case SO_RCVTIMEO:
2050 {
2051 struct sockbuf *sb = (optname == SO_SNDTIMEO ?
2052 &so->so_snd : &so->so_rcv);
2053 struct timeval tv;
2054 uint64_t nsecs;
2055
2056 if (m == NULL || m->m_len < sizeof (tv))
2057 return (EINVAL);
2058 memcpy(&tv, mtod(m, struct timeval *), sizeof tv);
2059 if (!timerisvalid(&tv))
2060 return (EINVAL);
2061 nsecs = TIMEVAL_TO_NSEC(&tv);
2062 if (nsecs == UINT64_MAX)
2063 return (EDOM);
2064 if (nsecs == 0)
2065 nsecs = INFSLP;
2066
2067 mtx_enter(&sb->sb_mtx);
2068 sb->sb_timeo_nsecs = nsecs;
2069 mtx_leave(&sb->sb_mtx);
2070 break;
2071 }
2072
2073 case SO_RTABLE:
2074 if (so->so_proto->pr_domain &&
2075 so->so_proto->pr_domain->dom_protosw &&
2076 so->so_proto->pr_ctloutput) {
2077 const struct domain *dom =
2078 so->so_proto->pr_domain;
2079
2080 level = dom->dom_protosw->pr_protocol;
2081 solock(so);
2082 error = (*so->so_proto->pr_ctloutput)
2083 (PRCO_SETOPT, so, level, optname, m);
2084 sounlock(so);
2085 } else
2086 error = ENOPROTOOPT;
2087 break;
2088 #ifdef SOCKET_SPLICE
2089 case SO_SPLICE:
2090 if (m == NULL) {
2091 error = sosplice(so, -1, 0, NULL);
2092 } else if (m->m_len < sizeof(int)) {
2093 error = EINVAL;
2094 } else if (m->m_len < sizeof(struct splice)) {
2095 error = sosplice(so, *mtod(m, int *), 0, NULL);
2096 } else {
2097 error = sosplice(so,
2098 mtod(m, struct splice *)->sp_fd,
2099 mtod(m, struct splice *)->sp_max,
2100 &mtod(m, struct splice *)->sp_idle);
2101 }
2102 break;
2103 #endif /* SOCKET_SPLICE */
2104
2105 default:
2106 error = ENOPROTOOPT;
2107 break;
2108 }
2109 }
2110
2111 return (error);
2112 }
2113
2114 int
sogetopt(struct socket * so,int level,int optname,struct mbuf * m)2115 sogetopt(struct socket *so, int level, int optname, struct mbuf *m)
2116 {
2117 int error = 0;
2118
2119 if (level != SOL_SOCKET) {
2120 if (so->so_proto->pr_ctloutput) {
2121 m->m_len = 0;
2122
2123 solock(so);
2124 error = (*so->so_proto->pr_ctloutput)(PRCO_GETOPT, so,
2125 level, optname, m);
2126 sounlock(so);
2127 return (error);
2128 } else
2129 return (ENOPROTOOPT);
2130 } else {
2131 m->m_len = sizeof (int);
2132
2133 switch (optname) {
2134
2135 case SO_LINGER:
2136 m->m_len = sizeof (struct linger);
2137 solock_shared(so);
2138 mtod(m, struct linger *)->l_onoff =
2139 so->so_options & SO_LINGER;
2140 mtod(m, struct linger *)->l_linger = so->so_linger;
2141 sounlock_shared(so);
2142 break;
2143
2144 case SO_BINDANY:
2145 case SO_USELOOPBACK:
2146 case SO_DEBUG:
2147 case SO_KEEPALIVE:
2148 case SO_REUSEADDR:
2149 case SO_REUSEPORT:
2150 case SO_BROADCAST:
2151 case SO_OOBINLINE:
2152 case SO_ACCEPTCONN:
2153 case SO_TIMESTAMP:
2154 case SO_ZEROIZE:
2155 *mtod(m, int *) = so->so_options & optname;
2156 break;
2157
2158 case SO_DONTROUTE:
2159 *mtod(m, int *) = 0;
2160 break;
2161
2162 case SO_TYPE:
2163 *mtod(m, int *) = so->so_type;
2164 break;
2165
2166 case SO_ERROR:
2167 solock(so);
2168 *mtod(m, int *) = so->so_error;
2169 so->so_error = 0;
2170 sounlock(so);
2171
2172 break;
2173
2174 case SO_DOMAIN:
2175 *mtod(m, int *) = so->so_proto->pr_domain->dom_family;
2176 break;
2177
2178 case SO_PROTOCOL:
2179 *mtod(m, int *) = so->so_proto->pr_protocol;
2180 break;
2181
2182 case SO_SNDBUF:
2183 *mtod(m, int *) = so->so_snd.sb_hiwat;
2184 break;
2185
2186 case SO_RCVBUF:
2187 *mtod(m, int *) = so->so_rcv.sb_hiwat;
2188 break;
2189
2190 case SO_SNDLOWAT:
2191 *mtod(m, int *) = so->so_snd.sb_lowat;
2192 break;
2193
2194 case SO_RCVLOWAT:
2195 *mtod(m, int *) = so->so_rcv.sb_lowat;
2196 break;
2197
2198 case SO_SNDTIMEO:
2199 case SO_RCVTIMEO:
2200 {
2201 struct sockbuf *sb = (optname == SO_SNDTIMEO ?
2202 &so->so_snd : &so->so_rcv);
2203 struct timeval tv;
2204 uint64_t nsecs;
2205
2206 mtx_enter(&sb->sb_mtx);
2207 nsecs = sb->sb_timeo_nsecs;
2208 mtx_leave(&sb->sb_mtx);
2209
2210 m->m_len = sizeof(struct timeval);
2211 memset(&tv, 0, sizeof(tv));
2212 if (nsecs != INFSLP)
2213 NSEC_TO_TIMEVAL(nsecs, &tv);
2214 memcpy(mtod(m, struct timeval *), &tv, sizeof tv);
2215 break;
2216 }
2217
2218 case SO_RTABLE:
2219 if (so->so_proto->pr_domain &&
2220 so->so_proto->pr_domain->dom_protosw &&
2221 so->so_proto->pr_ctloutput) {
2222 const struct domain *dom =
2223 so->so_proto->pr_domain;
2224
2225 level = dom->dom_protosw->pr_protocol;
2226 solock(so);
2227 error = (*so->so_proto->pr_ctloutput)
2228 (PRCO_GETOPT, so, level, optname, m);
2229 sounlock(so);
2230 if (error)
2231 return (error);
2232 break;
2233 }
2234 return (ENOPROTOOPT);
2235
2236 #ifdef SOCKET_SPLICE
2237 case SO_SPLICE:
2238 {
2239 off_t len;
2240
2241 m->m_len = sizeof(off_t);
2242 solock_shared(so);
2243 len = so->so_sp ? so->so_sp->ssp_len : 0;
2244 sounlock_shared(so);
2245 memcpy(mtod(m, off_t *), &len, sizeof(off_t));
2246 break;
2247 }
2248 #endif /* SOCKET_SPLICE */
2249
2250 case SO_PEERCRED:
2251 if (so->so_proto->pr_protocol == AF_UNIX) {
2252 struct unpcb *unp = sotounpcb(so);
2253
2254 solock(so);
2255 if (unp->unp_flags & UNP_FEIDS) {
2256 m->m_len = sizeof(unp->unp_connid);
2257 memcpy(mtod(m, caddr_t),
2258 &(unp->unp_connid), m->m_len);
2259 sounlock(so);
2260 break;
2261 }
2262 sounlock(so);
2263
2264 return (ENOTCONN);
2265 }
2266 return (EOPNOTSUPP);
2267
2268 default:
2269 return (ENOPROTOOPT);
2270 }
2271 return (0);
2272 }
2273 }
2274
2275 void
sohasoutofband(struct socket * so)2276 sohasoutofband(struct socket *so)
2277 {
2278 pgsigio(&so->so_sigio, SIGURG, 0);
2279 knote(&so->so_rcv.sb_klist, 0);
2280 }
2281
2282 void
sofilt_lock(struct socket * so,struct sockbuf * sb)2283 sofilt_lock(struct socket *so, struct sockbuf *sb)
2284 {
2285 switch (so->so_proto->pr_domain->dom_family) {
2286 case PF_INET:
2287 case PF_INET6:
2288 NET_LOCK_SHARED();
2289 break;
2290 default:
2291 rw_enter_write(&so->so_lock);
2292 break;
2293 }
2294
2295 mtx_enter(&sb->sb_mtx);
2296 }
2297
2298 void
sofilt_unlock(struct socket * so,struct sockbuf * sb)2299 sofilt_unlock(struct socket *so, struct sockbuf *sb)
2300 {
2301 mtx_leave(&sb->sb_mtx);
2302
2303 switch (so->so_proto->pr_domain->dom_family) {
2304 case PF_INET:
2305 case PF_INET6:
2306 NET_UNLOCK_SHARED();
2307 break;
2308 default:
2309 rw_exit_write(&so->so_lock);
2310 break;
2311 }
2312 }
2313
2314 int
soo_kqfilter(struct file * fp,struct knote * kn)2315 soo_kqfilter(struct file *fp, struct knote *kn)
2316 {
2317 struct socket *so = kn->kn_fp->f_data;
2318 struct sockbuf *sb;
2319
2320 switch (kn->kn_filter) {
2321 case EVFILT_READ:
2322 kn->kn_fop = &soread_filtops;
2323 sb = &so->so_rcv;
2324 break;
2325 case EVFILT_WRITE:
2326 kn->kn_fop = &sowrite_filtops;
2327 sb = &so->so_snd;
2328 break;
2329 case EVFILT_EXCEPT:
2330 kn->kn_fop = &soexcept_filtops;
2331 sb = &so->so_rcv;
2332 break;
2333 default:
2334 return (EINVAL);
2335 }
2336
2337 klist_insert(&sb->sb_klist, kn);
2338
2339 return (0);
2340 }
2341
2342 void
filt_sordetach(struct knote * kn)2343 filt_sordetach(struct knote *kn)
2344 {
2345 struct socket *so = kn->kn_fp->f_data;
2346
2347 klist_remove(&so->so_rcv.sb_klist, kn);
2348 }
2349
2350 int
filt_soread(struct knote * kn,long hint)2351 filt_soread(struct knote *kn, long hint)
2352 {
2353 struct socket *so = kn->kn_fp->f_data;
2354 u_int state = READ_ONCE(so->so_state);
2355 u_int error = READ_ONCE(so->so_error);
2356 int rv = 0;
2357
2358 MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx);
2359
2360 if (so->so_options & SO_ACCEPTCONN) {
2361 short qlen = READ_ONCE(so->so_qlen);
2362
2363 soassertlocked_readonly(so);
2364
2365 kn->kn_data = qlen;
2366 rv = (kn->kn_data != 0);
2367
2368 if (kn->kn_flags & (__EV_POLL | __EV_SELECT)) {
2369 if (state & SS_ISDISCONNECTED) {
2370 kn->kn_flags |= __EV_HUP;
2371 rv = 1;
2372 } else {
2373 rv = qlen || soreadable(so);
2374 }
2375 }
2376
2377 return rv;
2378 }
2379
2380 kn->kn_data = so->so_rcv.sb_cc;
2381 #ifdef SOCKET_SPLICE
2382 if (isspliced(so)) {
2383 rv = 0;
2384 } else
2385 #endif /* SOCKET_SPLICE */
2386 if (so->so_rcv.sb_state & SS_CANTRCVMORE) {
2387 kn->kn_flags |= EV_EOF;
2388 if (kn->kn_flags & __EV_POLL) {
2389 if (state & SS_ISDISCONNECTED)
2390 kn->kn_flags |= __EV_HUP;
2391 }
2392 kn->kn_fflags = error;
2393 rv = 1;
2394 } else if (error) {
2395 rv = 1;
2396 } else if (kn->kn_sfflags & NOTE_LOWAT) {
2397 rv = (kn->kn_data >= kn->kn_sdata);
2398 } else {
2399 rv = (kn->kn_data >= so->so_rcv.sb_lowat);
2400 }
2401
2402 return rv;
2403 }
2404
2405 void
filt_sowdetach(struct knote * kn)2406 filt_sowdetach(struct knote *kn)
2407 {
2408 struct socket *so = kn->kn_fp->f_data;
2409
2410 klist_remove(&so->so_snd.sb_klist, kn);
2411 }
2412
2413 int
filt_sowrite(struct knote * kn,long hint)2414 filt_sowrite(struct knote *kn, long hint)
2415 {
2416 struct socket *so = kn->kn_fp->f_data;
2417 u_int state = READ_ONCE(so->so_state);
2418 u_int error = READ_ONCE(so->so_error);
2419 int rv;
2420
2421 MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx);
2422
2423 kn->kn_data = sbspace_locked(so, &so->so_snd);
2424 if (so->so_snd.sb_state & SS_CANTSENDMORE) {
2425 kn->kn_flags |= EV_EOF;
2426 if (kn->kn_flags & __EV_POLL) {
2427 if (state & SS_ISDISCONNECTED)
2428 kn->kn_flags |= __EV_HUP;
2429 }
2430 kn->kn_fflags = error;
2431 rv = 1;
2432 } else if (error) {
2433 rv = 1;
2434 } else if (((state & SS_ISCONNECTED) == 0) &&
2435 (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
2436 rv = 0;
2437 } else if (kn->kn_sfflags & NOTE_LOWAT) {
2438 rv = (kn->kn_data >= kn->kn_sdata);
2439 } else {
2440 rv = (kn->kn_data >= so->so_snd.sb_lowat);
2441 }
2442
2443 return (rv);
2444 }
2445
2446 int
filt_soexcept(struct knote * kn,long hint)2447 filt_soexcept(struct knote *kn, long hint)
2448 {
2449 struct socket *so = kn->kn_fp->f_data;
2450 int rv = 0;
2451
2452 MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx);
2453
2454 #ifdef SOCKET_SPLICE
2455 if (isspliced(so)) {
2456 rv = 0;
2457 } else
2458 #endif /* SOCKET_SPLICE */
2459 if (kn->kn_sfflags & NOTE_OOB) {
2460 if (so->so_oobmark || (so->so_rcv.sb_state & SS_RCVATMARK)) {
2461 kn->kn_fflags |= NOTE_OOB;
2462 kn->kn_data -= so->so_oobmark;
2463 rv = 1;
2464 }
2465 }
2466
2467 if (kn->kn_flags & __EV_POLL) {
2468 u_int state = READ_ONCE(so->so_state);
2469
2470 if (state & SS_ISDISCONNECTED) {
2471 kn->kn_flags |= __EV_HUP;
2472 rv = 1;
2473 }
2474 }
2475
2476 return rv;
2477 }
2478
2479 int
filt_sowmodify(struct kevent * kev,struct knote * kn)2480 filt_sowmodify(struct kevent *kev, struct knote *kn)
2481 {
2482 struct socket *so = kn->kn_fp->f_data;
2483 int rv;
2484
2485 sofilt_lock(so, &so->so_snd);
2486 rv = knote_modify(kev, kn);
2487 sofilt_unlock(so, &so->so_snd);
2488
2489 return (rv);
2490 }
2491
2492 int
filt_sowprocess(struct knote * kn,struct kevent * kev)2493 filt_sowprocess(struct knote *kn, struct kevent *kev)
2494 {
2495 struct socket *so = kn->kn_fp->f_data;
2496 int rv;
2497
2498 sofilt_lock(so, &so->so_snd);
2499 rv = knote_process(kn, kev);
2500 sofilt_unlock(so, &so->so_snd);
2501
2502 return (rv);
2503 }
2504
2505 int
filt_sormodify(struct kevent * kev,struct knote * kn)2506 filt_sormodify(struct kevent *kev, struct knote *kn)
2507 {
2508 struct socket *so = kn->kn_fp->f_data;
2509 int rv;
2510
2511 sofilt_lock(so, &so->so_rcv);
2512 rv = knote_modify(kev, kn);
2513 sofilt_unlock(so, &so->so_rcv);
2514
2515 return (rv);
2516 }
2517
2518 int
filt_sorprocess(struct knote * kn,struct kevent * kev)2519 filt_sorprocess(struct knote *kn, struct kevent *kev)
2520 {
2521 struct socket *so = kn->kn_fp->f_data;
2522 int rv;
2523
2524 sofilt_lock(so, &so->so_rcv);
2525 rv = knote_process(kn, kev);
2526 sofilt_unlock(so, &so->so_rcv);
2527
2528 return (rv);
2529 }
2530
2531 #ifdef DDB
2532 void
2533 sobuf_print(struct sockbuf *,
2534 int (*)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))));
2535
2536 void
sobuf_print(struct sockbuf * sb,int (* pr)(const char *,...))2537 sobuf_print(struct sockbuf *sb,
2538 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
2539 {
2540 (*pr)("\tsb_cc: %lu\n", sb->sb_cc);
2541 (*pr)("\tsb_datacc: %lu\n", sb->sb_datacc);
2542 (*pr)("\tsb_hiwat: %lu\n", sb->sb_hiwat);
2543 (*pr)("\tsb_wat: %lu\n", sb->sb_wat);
2544 (*pr)("\tsb_mbcnt: %lu\n", sb->sb_mbcnt);
2545 (*pr)("\tsb_mbmax: %lu\n", sb->sb_mbmax);
2546 (*pr)("\tsb_lowat: %ld\n", sb->sb_lowat);
2547 (*pr)("\tsb_mb: %p\n", sb->sb_mb);
2548 (*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail);
2549 (*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord);
2550 (*pr)("\tsb_flags: %04x\n", sb->sb_flags);
2551 (*pr)("\tsb_state: %04x\n", sb->sb_state);
2552 (*pr)("\tsb_timeo_nsecs: %llu\n", sb->sb_timeo_nsecs);
2553 }
2554
2555 void
so_print(void * v,int (* pr)(const char *,...))2556 so_print(void *v,
2557 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
2558 {
2559 struct socket *so = v;
2560
2561 (*pr)("socket %p\n", so);
2562 (*pr)("so_type: %i\n", so->so_type);
2563 (*pr)("so_options: 0x%04x\n", so->so_options); /* %b */
2564 (*pr)("so_linger: %i\n", so->so_linger);
2565 (*pr)("so_state: 0x%04x\n", so->so_state);
2566 (*pr)("so_pcb: %p\n", so->so_pcb);
2567 (*pr)("so_proto: %p\n", so->so_proto);
2568 (*pr)("so_sigio: %p\n", so->so_sigio.sir_sigio);
2569
2570 (*pr)("so_head: %p\n", so->so_head);
2571 (*pr)("so_onq: %p\n", so->so_onq);
2572 (*pr)("so_q0: @%p first: %p\n", &so->so_q0, TAILQ_FIRST(&so->so_q0));
2573 (*pr)("so_q: @%p first: %p\n", &so->so_q, TAILQ_FIRST(&so->so_q));
2574 (*pr)("so_eq: next: %p\n", TAILQ_NEXT(so, so_qe));
2575 (*pr)("so_q0len: %i\n", so->so_q0len);
2576 (*pr)("so_qlen: %i\n", so->so_qlen);
2577 (*pr)("so_qlimit: %i\n", so->so_qlimit);
2578 (*pr)("so_timeo: %i\n", so->so_timeo);
2579 (*pr)("so_obmark: %lu\n", so->so_oobmark);
2580
2581 (*pr)("so_sp: %p\n", so->so_sp);
2582 if (so->so_sp != NULL) {
2583 (*pr)("\tssp_socket: %p\n", so->so_sp->ssp_socket);
2584 (*pr)("\tssp_soback: %p\n", so->so_sp->ssp_soback);
2585 (*pr)("\tssp_len: %lld\n",
2586 (unsigned long long)so->so_sp->ssp_len);
2587 (*pr)("\tssp_max: %lld\n",
2588 (unsigned long long)so->so_sp->ssp_max);
2589 (*pr)("\tssp_idletv: %lld %ld\n", so->so_sp->ssp_idletv.tv_sec,
2590 so->so_sp->ssp_idletv.tv_usec);
2591 (*pr)("\tssp_idleto: %spending (@%i)\n",
2592 timeout_pending(&so->so_sp->ssp_idleto) ? "" : "not ",
2593 so->so_sp->ssp_idleto.to_time);
2594 }
2595
2596 (*pr)("so_rcv:\n");
2597 sobuf_print(&so->so_rcv, pr);
2598 (*pr)("so_snd:\n");
2599 sobuf_print(&so->so_snd, pr);
2600
2601 (*pr)("so_upcall: %p so_upcallarg: %p\n",
2602 so->so_upcall, so->so_upcallarg);
2603
2604 (*pr)("so_euid: %d so_ruid: %d\n", so->so_euid, so->so_ruid);
2605 (*pr)("so_egid: %d so_rgid: %d\n", so->so_egid, so->so_rgid);
2606 (*pr)("so_cpid: %d\n", so->so_cpid);
2607 }
2608 #endif
2609