1 /* $OpenBSD: uipc_usrreq.c,v 1.214 2025/01/25 22:06:41 bluhm Exp $ */
2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */
3
4 /*
5 * Copyright (c) 1982, 1986, 1989, 1991, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
33 */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/filedesc.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/unpcb.h>
45 #include <sys/un.h>
46 #include <sys/namei.h>
47 #include <sys/vnode.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/mbuf.h>
51 #include <sys/task.h>
52 #include <sys/pledge.h>
53 #include <sys/pool.h>
54 #include <sys/rwlock.h>
55 #include <sys/mutex.h>
56 #include <sys/sysctl.h>
57 #include <sys/lock.h>
58 #include <sys/refcnt.h>
59
60 #include "kcov.h"
61 #if NKCOV > 0
62 #include <sys/kcov.h>
63 #endif
64
65 /*
66 * Locks used to protect global data and struct members:
67 * I immutable after creation
68 * D unp_df_lock
69 * G unp_gc_lock
70 * M unp_ino_mtx
71 * R unp_rights_mtx
72 * a atomic
73 * s socket lock
74 */
75
76 struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk");
77 struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk");
78
79 struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
80 struct mutex unp_ino_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
81
82 /*
83 * Stack of sets of files that were passed over a socket but were
84 * not received and need to be closed.
85 */
86 struct unp_deferral {
87 SLIST_ENTRY(unp_deferral) ud_link; /* [D] */
88 int ud_n; /* [I] */
89 /* followed by ud_n struct fdpass */
90 struct fdpass ud_fp[]; /* [I] */
91 };
92
93 void uipc_setaddr(const struct unpcb *, struct mbuf *);
94 void unp_discard(struct fdpass *, int);
95 void unp_remove_gcrefs(struct fdpass *, int);
96 void unp_restore_gcrefs(struct fdpass *, int);
97 void unp_scan(struct mbuf *, void (*)(struct fdpass *, int));
98 int unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *);
99 static inline void unp_ref(struct unpcb *);
100 static inline void unp_rele(struct unpcb *);
101 struct socket *unp_solock_peer(struct socket *);
102
103 struct pool unpcb_pool;
104 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL);
105
106 /*
107 * Unix communications domain.
108 *
109 * TODO:
110 * RDM
111 * rethink name space problems
112 * need a proper out-of-band
113 */
114 const struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
115
116 /* [G] list of all UNIX domain sockets, for unp_gc() */
117 LIST_HEAD(unp_head, unpcb) unp_head =
118 LIST_HEAD_INITIALIZER(unp_head);
119 /* [D] list of sets of files that were sent over sockets that are now closed */
120 SLIST_HEAD(,unp_deferral) unp_deferred =
121 SLIST_HEAD_INITIALIZER(unp_deferred);
122
123 ino_t unp_ino; /* [U] prototype for fake inode numbers */
124 int unp_rights; /* [R] file descriptors in flight */
125 int unp_defer; /* [G] number of deferred fp to close by the GC task */
126 int unp_gcing; /* [G] GC task currently running */
127
128 const struct pr_usrreqs uipc_usrreqs = {
129 .pru_attach = uipc_attach,
130 .pru_detach = uipc_detach,
131 .pru_bind = uipc_bind,
132 .pru_listen = uipc_listen,
133 .pru_connect = uipc_connect,
134 .pru_accept = uipc_accept,
135 .pru_disconnect = uipc_disconnect,
136 .pru_shutdown = uipc_shutdown,
137 .pru_rcvd = uipc_rcvd,
138 .pru_send = uipc_send,
139 .pru_abort = uipc_abort,
140 .pru_sense = uipc_sense,
141 .pru_sockaddr = uipc_sockaddr,
142 .pru_peeraddr = uipc_peeraddr,
143 .pru_connect2 = uipc_connect2,
144 };
145
146 const struct pr_usrreqs uipc_dgram_usrreqs = {
147 .pru_attach = uipc_attach,
148 .pru_detach = uipc_detach,
149 .pru_bind = uipc_bind,
150 .pru_listen = uipc_listen,
151 .pru_connect = uipc_connect,
152 .pru_disconnect = uipc_disconnect,
153 .pru_shutdown = uipc_dgram_shutdown,
154 .pru_send = uipc_dgram_send,
155 .pru_sense = uipc_sense,
156 .pru_sockaddr = uipc_sockaddr,
157 .pru_peeraddr = uipc_peeraddr,
158 .pru_connect2 = uipc_connect2,
159 };
160
161 void
unp_init(void)162 unp_init(void)
163 {
164 pool_init(&unpcb_pool, sizeof(struct unpcb), 0,
165 IPL_SOFTNET, 0, "unpcb", NULL);
166 }
167
168 static inline void
unp_ref(struct unpcb * unp)169 unp_ref(struct unpcb *unp)
170 {
171 refcnt_take(&unp->unp_refcnt);
172 }
173
174 static inline void
unp_rele(struct unpcb * unp)175 unp_rele(struct unpcb *unp)
176 {
177 refcnt_rele_wake(&unp->unp_refcnt);
178 }
179
180 struct socket *
unp_solock_peer(struct socket * so)181 unp_solock_peer(struct socket *so)
182 {
183 struct unpcb *unp, *unp2;
184 struct socket *so2;
185
186 unp = so->so_pcb;
187
188 again:
189 if ((unp2 = unp->unp_conn) == NULL)
190 return NULL;
191
192 so2 = unp2->unp_socket;
193
194 if (so < so2)
195 solock(so2);
196 else if (so > so2) {
197 unp_ref(unp2);
198 sounlock(so);
199 solock(so2);
200 solock(so);
201
202 /* Datagram socket could be reconnected due to re-lock. */
203 if (unp->unp_conn != unp2) {
204 sounlock(so2);
205 unp_rele(unp2);
206 goto again;
207 }
208
209 unp_rele(unp2);
210 }
211
212 return so2;
213 }
214
215 void
uipc_setaddr(const struct unpcb * unp,struct mbuf * nam)216 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam)
217 {
218 if (unp != NULL && unp->unp_addr != NULL) {
219 nam->m_len = unp->unp_addr->m_len;
220 memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t),
221 nam->m_len);
222 } else {
223 nam->m_len = sizeof(sun_noname);
224 memcpy(mtod(nam, struct sockaddr *), &sun_noname,
225 nam->m_len);
226 }
227 }
228
229 /*
230 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
231 * for stream sockets, although the total for sender and receiver is
232 * actually only PIPSIZ.
233 * Datagram sockets really use the sendspace as the maximum datagram size,
234 * and don't really want to reserve the sendspace. Their recvspace should
235 * be large enough for at least one max-size datagram plus address.
236 */
237 #define PIPSIZ 32768
238 u_int unpst_sendspace = PIPSIZ; /* [a] */
239 u_int unpst_recvspace = PIPSIZ; /* [a] */
240 u_int unpsq_sendspace = PIPSIZ; /* [a] */
241 u_int unpsq_recvspace = PIPSIZ; /* [a] */
242 u_int unpdg_sendspace = 8192; /* [a] really max datagram size */
243 u_int unpdg_recvspace = PIPSIZ; /* [a] */
244
245 const struct sysctl_bounded_args unpstctl_vars[] = {
246 { UNPCTL_RECVSPACE, &unpst_recvspace, 0, SB_MAX },
247 { UNPCTL_SENDSPACE, &unpst_sendspace, 0, SB_MAX },
248 };
249 const struct sysctl_bounded_args unpsqctl_vars[] = {
250 { UNPCTL_RECVSPACE, &unpsq_recvspace, 0, SB_MAX },
251 { UNPCTL_SENDSPACE, &unpsq_sendspace, 0, SB_MAX },
252 };
253 const struct sysctl_bounded_args unpdgctl_vars[] = {
254 { UNPCTL_RECVSPACE, &unpdg_recvspace, 0, SB_MAX },
255 { UNPCTL_SENDSPACE, &unpdg_sendspace, 0, SB_MAX },
256 };
257
258 int
uipc_attach(struct socket * so,int proto,int wait)259 uipc_attach(struct socket *so, int proto, int wait)
260 {
261 struct unpcb *unp;
262 int error;
263
264 if (so->so_pcb)
265 return EISCONN;
266 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
267 switch (so->so_type) {
268
269 case SOCK_STREAM:
270 error = soreserve(so,
271 atomic_load_int(&unpst_sendspace),
272 atomic_load_int(&unpst_recvspace));
273 break;
274
275 case SOCK_SEQPACKET:
276 error = soreserve(so,
277 atomic_load_int(&unpsq_sendspace),
278 atomic_load_int(&unpsq_recvspace));
279 break;
280
281 case SOCK_DGRAM:
282 error = soreserve(so,
283 atomic_load_int(&unpdg_sendspace),
284 atomic_load_int(&unpdg_recvspace));
285 break;
286
287 default:
288 panic("unp_attach");
289 }
290 if (error)
291 return (error);
292 }
293 unp = pool_get(&unpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) |
294 PR_ZERO);
295 if (unp == NULL)
296 return (ENOBUFS);
297 refcnt_init(&unp->unp_refcnt);
298 unp->unp_socket = so;
299 so->so_pcb = unp;
300 getnanotime(&unp->unp_ctime);
301
302 rw_enter_write(&unp_gc_lock);
303 LIST_INSERT_HEAD(&unp_head, unp, unp_link);
304 rw_exit_write(&unp_gc_lock);
305
306 return (0);
307 }
308
309 int
uipc_detach(struct socket * so)310 uipc_detach(struct socket *so)
311 {
312 struct unpcb *unp = sotounpcb(so);
313
314 if (unp == NULL)
315 return (EINVAL);
316
317 unp_detach(unp);
318
319 return (0);
320 }
321
322 int
uipc_bind(struct socket * so,struct mbuf * nam,struct proc * p)323 uipc_bind(struct socket *so, struct mbuf *nam, struct proc *p)
324 {
325 struct unpcb *unp = sotounpcb(so);
326 struct sockaddr_un *soun;
327 struct mbuf *nam2;
328 struct vnode *vp;
329 struct vattr vattr;
330 int error;
331 struct nameidata nd;
332 size_t pathlen;
333
334 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
335 return (EINVAL);
336 if (unp->unp_vnode != NULL)
337 return (EINVAL);
338 if ((error = unp_nam2sun(nam, &soun, &pathlen)))
339 return (error);
340
341 unp->unp_flags |= UNP_BINDING;
342
343 /*
344 * Enforce `i_lock' -> `solock' because fifo subsystem
345 * requires it. The socket can't be closed concurrently
346 * because the file descriptor reference is still held.
347 */
348
349 sounlock(unp->unp_socket);
350
351 nam2 = m_getclr(M_WAITOK, MT_SONAME);
352 nam2->m_len = sizeof(struct sockaddr_un);
353 memcpy(mtod(nam2, struct sockaddr_un *), soun,
354 offsetof(struct sockaddr_un, sun_path) + pathlen);
355 /* No need to NUL terminate: m_getclr() returns zero'd mbufs. */
356
357 soun = mtod(nam2, struct sockaddr_un *);
358
359 /* Fixup sun_len to keep it in sync with m_len. */
360 soun->sun_len = nam2->m_len;
361
362 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
363 soun->sun_path, p);
364 nd.ni_pledge = PLEDGE_UNIX;
365 nd.ni_unveil = UNVEIL_CREATE;
366
367 KERNEL_LOCK();
368 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
369 error = namei(&nd);
370 if (error != 0) {
371 m_freem(nam2);
372 solock(unp->unp_socket);
373 goto out;
374 }
375 vp = nd.ni_vp;
376 if (vp != NULL) {
377 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
378 if (nd.ni_dvp == vp)
379 vrele(nd.ni_dvp);
380 else
381 vput(nd.ni_dvp);
382 vrele(vp);
383 m_freem(nam2);
384 error = EADDRINUSE;
385 solock(unp->unp_socket);
386 goto out;
387 }
388 vattr_null(&vattr);
389 vattr.va_type = VSOCK;
390 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
391 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
392 vput(nd.ni_dvp);
393 if (error) {
394 m_freem(nam2);
395 solock(unp->unp_socket);
396 goto out;
397 }
398 solock(unp->unp_socket);
399 unp->unp_addr = nam2;
400 vp = nd.ni_vp;
401 vp->v_socket = unp->unp_socket;
402 unp->unp_vnode = vp;
403 unp->unp_connid.uid = p->p_ucred->cr_uid;
404 unp->unp_connid.gid = p->p_ucred->cr_gid;
405 unp->unp_connid.pid = p->p_p->ps_pid;
406 unp->unp_flags |= UNP_FEIDSBIND;
407 VOP_UNLOCK(vp);
408 out:
409 KERNEL_UNLOCK();
410 unp->unp_flags &= ~UNP_BINDING;
411
412 return (error);
413 }
414
415 int
uipc_listen(struct socket * so)416 uipc_listen(struct socket *so)
417 {
418 struct unpcb *unp = sotounpcb(so);
419
420 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
421 return (EINVAL);
422 if (unp->unp_vnode == NULL)
423 return (EINVAL);
424 return (0);
425 }
426
427 int
uipc_connect(struct socket * so,struct mbuf * nam)428 uipc_connect(struct socket *so, struct mbuf *nam)
429 {
430 return unp_connect(so, nam, curproc);
431 }
432
433 int
uipc_accept(struct socket * so,struct mbuf * nam)434 uipc_accept(struct socket *so, struct mbuf *nam)
435 {
436 struct socket *so2;
437 struct unpcb *unp = sotounpcb(so);
438
439 /*
440 * Pass back name of connected socket, if it was bound and
441 * we are still connected (our peer may have closed already!).
442 */
443 so2 = unp_solock_peer(so);
444 uipc_setaddr(unp->unp_conn, nam);
445
446 if (so2 != NULL && so2 != so)
447 sounlock(so2);
448 return (0);
449 }
450
451 int
uipc_disconnect(struct socket * so)452 uipc_disconnect(struct socket *so)
453 {
454 struct unpcb *unp = sotounpcb(so);
455
456 unp_disconnect(unp);
457 return (0);
458 }
459
460 int
uipc_shutdown(struct socket * so)461 uipc_shutdown(struct socket *so)
462 {
463 struct unpcb *unp = sotounpcb(so);
464 struct socket *so2;
465
466 socantsendmore(so);
467
468 if (unp->unp_conn != NULL) {
469 so2 = unp->unp_conn->unp_socket;
470 socantrcvmore(so2);
471 }
472
473 return (0);
474 }
475
476 int
uipc_dgram_shutdown(struct socket * so)477 uipc_dgram_shutdown(struct socket *so)
478 {
479 socantsendmore(so);
480 return (0);
481 }
482
483 void
uipc_rcvd(struct socket * so)484 uipc_rcvd(struct socket *so)
485 {
486 struct unpcb *unp = sotounpcb(so);
487 struct socket *so2;
488
489 if (unp->unp_conn == NULL)
490 return;
491 so2 = unp->unp_conn->unp_socket;
492
493 /*
494 * Adjust backpressure on sender
495 * and wakeup any waiting to write.
496 */
497 mtx_enter(&so->so_rcv.sb_mtx);
498 mtx_enter(&so2->so_snd.sb_mtx);
499 so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt;
500 so2->so_snd.sb_cc = so->so_rcv.sb_cc;
501 mtx_leave(&so2->so_snd.sb_mtx);
502 mtx_leave(&so->so_rcv.sb_mtx);
503 sowwakeup(so2);
504 }
505
506 int
uipc_send(struct socket * so,struct mbuf * m,struct mbuf * nam,struct mbuf * control)507 uipc_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
508 struct mbuf *control)
509 {
510 struct unpcb *unp = sotounpcb(so);
511 struct socket *so2;
512 int error = 0, dowakeup = 0;
513
514 if (control) {
515 sounlock(so);
516 error = unp_internalize(control, curproc);
517 solock(so);
518 if (error)
519 goto out;
520 }
521
522 /*
523 * We hold both solock() and `sb_mtx' mutex while modifying
524 * SS_CANTSENDMORE flag. solock() is enough to check it.
525 */
526 if (so->so_snd.sb_state & SS_CANTSENDMORE) {
527 error = EPIPE;
528 goto dispose;
529 }
530 if (unp->unp_conn == NULL) {
531 error = ENOTCONN;
532 goto dispose;
533 }
534
535 so2 = unp->unp_conn->unp_socket;
536
537 /*
538 * Send to paired receive port, and then raise
539 * send buffer counts to maintain backpressure.
540 * Wake up readers.
541 */
542 /*
543 * sbappend*() should be serialized together
544 * with so_snd modification.
545 */
546 mtx_enter(&so2->so_rcv.sb_mtx);
547 mtx_enter(&so->so_snd.sb_mtx);
548 if (control) {
549 if (sbappendcontrol(so2, &so2->so_rcv, m, control)) {
550 control = NULL;
551 } else {
552 mtx_leave(&so->so_snd.sb_mtx);
553 mtx_leave(&so2->so_rcv.sb_mtx);
554 error = ENOBUFS;
555 goto dispose;
556 }
557 } else if (so->so_type == SOCK_SEQPACKET)
558 sbappendrecord(so2, &so2->so_rcv, m);
559 else
560 sbappend(so2, &so2->so_rcv, m);
561 so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt;
562 so->so_snd.sb_cc = so2->so_rcv.sb_cc;
563 if (so2->so_rcv.sb_cc > 0)
564 dowakeup = 1;
565 mtx_leave(&so->so_snd.sb_mtx);
566 mtx_leave(&so2->so_rcv.sb_mtx);
567
568 if (dowakeup)
569 sorwakeup(so2);
570
571 m = NULL;
572
573 dispose:
574 /* we need to undo unp_internalize in case of errors */
575 if (control && error)
576 unp_dispose(control);
577
578 out:
579 m_freem(control);
580 m_freem(m);
581
582 return (error);
583 }
584
585 int
uipc_dgram_send(struct socket * so,struct mbuf * m,struct mbuf * nam,struct mbuf * control)586 uipc_dgram_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
587 struct mbuf *control)
588 {
589 struct unpcb *unp = sotounpcb(so);
590 struct socket *so2;
591 const struct sockaddr *from;
592 int error = 0, dowakeup = 0;
593
594 if (control) {
595 sounlock(so);
596 error = unp_internalize(control, curproc);
597 solock(so);
598 if (error)
599 goto out;
600 }
601
602 if (nam) {
603 if (unp->unp_conn) {
604 error = EISCONN;
605 goto dispose;
606 }
607 error = unp_connect(so, nam, curproc);
608 if (error)
609 goto dispose;
610 }
611
612 if (unp->unp_conn == NULL) {
613 if (nam != NULL)
614 error = ECONNREFUSED;
615 else
616 error = ENOTCONN;
617 goto dispose;
618 }
619
620 so2 = unp->unp_conn->unp_socket;
621
622 if (unp->unp_addr)
623 from = mtod(unp->unp_addr, struct sockaddr *);
624 else
625 from = &sun_noname;
626
627 mtx_enter(&so2->so_rcv.sb_mtx);
628 if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) {
629 dowakeup = 1;
630 m = NULL;
631 control = NULL;
632 } else
633 error = ENOBUFS;
634 mtx_leave(&so2->so_rcv.sb_mtx);
635
636 if (dowakeup)
637 sorwakeup(so2);
638 if (nam)
639 unp_disconnect(unp);
640
641 dispose:
642 /* we need to undo unp_internalize in case of errors */
643 if (control && error)
644 unp_dispose(control);
645
646 out:
647 m_freem(control);
648 m_freem(m);
649
650 return (error);
651 }
652
653 void
uipc_abort(struct socket * so)654 uipc_abort(struct socket *so)
655 {
656 struct unpcb *unp = sotounpcb(so);
657
658 unp_detach(unp);
659 sofree(so, 1);
660 }
661
662 int
uipc_sense(struct socket * so,struct stat * sb)663 uipc_sense(struct socket *so, struct stat *sb)
664 {
665 struct unpcb *unp = sotounpcb(so);
666
667 sb->st_blksize = so->so_snd.sb_hiwat;
668 sb->st_dev = NODEV;
669 mtx_enter(&unp_ino_mtx);
670 if (unp->unp_ino == 0)
671 unp->unp_ino = unp_ino++;
672 mtx_leave(&unp_ino_mtx);
673 sb->st_atim.tv_sec =
674 sb->st_mtim.tv_sec =
675 sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec;
676 sb->st_atim.tv_nsec =
677 sb->st_mtim.tv_nsec =
678 sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec;
679 sb->st_ino = unp->unp_ino;
680
681 return (0);
682 }
683
684 int
uipc_sockaddr(struct socket * so,struct mbuf * nam)685 uipc_sockaddr(struct socket *so, struct mbuf *nam)
686 {
687 struct unpcb *unp = sotounpcb(so);
688
689 uipc_setaddr(unp, nam);
690 return (0);
691 }
692
693 int
uipc_peeraddr(struct socket * so,struct mbuf * nam)694 uipc_peeraddr(struct socket *so, struct mbuf *nam)
695 {
696 struct unpcb *unp = sotounpcb(so);
697 struct socket *so2;
698
699 so2 = unp_solock_peer(so);
700 uipc_setaddr(unp->unp_conn, nam);
701 if (so2 != NULL && so2 != so)
702 sounlock(so2);
703 return (0);
704 }
705
706 int
uipc_connect2(struct socket * so,struct socket * so2)707 uipc_connect2(struct socket *so, struct socket *so2)
708 {
709 struct unpcb *unp = sotounpcb(so), *unp2;
710 int error;
711
712 if ((error = unp_connect2(so, so2)))
713 return (error);
714
715 unp->unp_connid.uid = curproc->p_ucred->cr_uid;
716 unp->unp_connid.gid = curproc->p_ucred->cr_gid;
717 unp->unp_connid.pid = curproc->p_p->ps_pid;
718 unp->unp_flags |= UNP_FEIDS;
719 unp2 = sotounpcb(so2);
720 unp2->unp_connid.uid = curproc->p_ucred->cr_uid;
721 unp2->unp_connid.gid = curproc->p_ucred->cr_gid;
722 unp2->unp_connid.pid = curproc->p_p->ps_pid;
723 unp2->unp_flags |= UNP_FEIDS;
724
725 return (0);
726 }
727
728 int
uipc_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)729 uipc_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
730 size_t newlen)
731 {
732 int *valp = &unp_defer;
733
734 /* All sysctl names at this level are terminal. */
735 switch (name[0]) {
736 case SOCK_STREAM:
737 if (namelen != 2)
738 return (ENOTDIR);
739 return sysctl_bounded_arr(unpstctl_vars, nitems(unpstctl_vars),
740 name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
741 case SOCK_SEQPACKET:
742 if (namelen != 2)
743 return (ENOTDIR);
744 return sysctl_bounded_arr(unpsqctl_vars, nitems(unpsqctl_vars),
745 name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
746 case SOCK_DGRAM:
747 if (namelen != 2)
748 return (ENOTDIR);
749 return sysctl_bounded_arr(unpdgctl_vars, nitems(unpdgctl_vars),
750 name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
751 case NET_UNIX_INFLIGHT:
752 valp = &unp_rights;
753 /* FALLTHROUGH */
754 case NET_UNIX_DEFERRED:
755 if (namelen != 1)
756 return (ENOTDIR);
757 return sysctl_rdint(oldp, oldlenp, newp, *valp);
758 default:
759 return (ENOPROTOOPT);
760 }
761 }
762
763 void
unp_detach(struct unpcb * unp)764 unp_detach(struct unpcb *unp)
765 {
766 struct socket *so = unp->unp_socket;
767 struct vnode *vp = unp->unp_vnode;
768 struct unpcb *unp2;
769
770 unp->unp_vnode = NULL;
771
772 rw_enter_write(&unp_gc_lock);
773 LIST_REMOVE(unp, unp_link);
774 rw_exit_write(&unp_gc_lock);
775
776 if (vp != NULL) {
777 /* Enforce `i_lock' -> solock() lock order. */
778 sounlock(so);
779 VOP_LOCK(vp, LK_EXCLUSIVE);
780 vp->v_socket = NULL;
781
782 KERNEL_LOCK();
783 vput(vp);
784 KERNEL_UNLOCK();
785 solock(so);
786 }
787
788 if (unp->unp_conn != NULL) {
789 /*
790 * Datagram socket could be connected to itself.
791 * Such socket will be disconnected here.
792 */
793 unp_disconnect(unp);
794 }
795
796 while ((unp2 = SLIST_FIRST(&unp->unp_refs)) != NULL) {
797 struct socket *so2 = unp2->unp_socket;
798
799 if (so < so2)
800 solock(so2);
801 else {
802 unp_ref(unp2);
803 sounlock(so);
804 solock(so2);
805 solock(so);
806
807 if (unp2->unp_conn != unp) {
808 /* `unp2' was disconnected due to re-lock. */
809 sounlock(so2);
810 unp_rele(unp2);
811 continue;
812 }
813
814 unp_rele(unp2);
815 }
816
817 unp2->unp_conn = NULL;
818 SLIST_REMOVE(&unp->unp_refs, unp2, unpcb, unp_nextref);
819 so2->so_error = ECONNRESET;
820 so2->so_state &= ~SS_ISCONNECTED;
821
822 sounlock(so2);
823 }
824
825 sounlock(so);
826 refcnt_finalize(&unp->unp_refcnt, "unpfinal");
827 solock(so);
828
829 soisdisconnected(so);
830 so->so_pcb = NULL;
831 m_freem(unp->unp_addr);
832 pool_put(&unpcb_pool, unp);
833 if (unp_rights)
834 task_add(systqmp, &unp_gc_task);
835 }
836
837 int
unp_connect(struct socket * so,struct mbuf * nam,struct proc * p)838 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
839 {
840 struct sockaddr_un *soun;
841 struct vnode *vp;
842 struct socket *so2, *so3;
843 struct unpcb *unp, *unp2, *unp3;
844 struct nameidata nd;
845 int error;
846
847 unp = sotounpcb(so);
848 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
849 return (EISCONN);
850 if ((error = unp_nam2sun(nam, &soun, NULL)))
851 return (error);
852
853 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
854 nd.ni_pledge = PLEDGE_UNIX;
855 nd.ni_unveil = UNVEIL_WRITE;
856
857 unp->unp_flags |= UNP_CONNECTING;
858
859 /*
860 * Enforce `i_lock' -> `solock' because fifo subsystem
861 * requires it. The socket can't be closed concurrently
862 * because the file descriptor reference is still held.
863 */
864
865 sounlock(so);
866
867 KERNEL_LOCK();
868 error = namei(&nd);
869 if (error != 0)
870 goto unlock;
871 vp = nd.ni_vp;
872 if (vp->v_type != VSOCK) {
873 error = ENOTSOCK;
874 goto put;
875 }
876 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
877 goto put;
878 so2 = vp->v_socket;
879 if (so2 == NULL) {
880 error = ECONNREFUSED;
881 goto put;
882 }
883 if (so->so_type != so2->so_type) {
884 error = EPROTOTYPE;
885 goto put;
886 }
887
888 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
889 solock(so2);
890
891 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
892 (so3 = sonewconn(so2, 0, M_WAIT)) == NULL) {
893 sounlock(so2);
894 error = ECONNREFUSED;
895 goto put;
896 }
897
898 /*
899 * Since `so2' is protected by vnode(9) lock, `so3'
900 * can't be PRU_ABORT'ed here.
901 */
902 sounlock(so2);
903 sounlock(so3);
904 solock_pair(so, so3);
905
906 unp2 = sotounpcb(so2);
907 unp3 = sotounpcb(so3);
908
909 /*
910 * `unp_addr', `unp_connid' and 'UNP_FEIDSBIND' flag
911 * are immutable since we set them in uipc_bind().
912 */
913 if (unp2->unp_addr)
914 unp3->unp_addr =
915 m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT);
916 unp3->unp_connid.uid = p->p_ucred->cr_uid;
917 unp3->unp_connid.gid = p->p_ucred->cr_gid;
918 unp3->unp_connid.pid = p->p_p->ps_pid;
919 unp3->unp_flags |= UNP_FEIDS;
920
921 if (unp2->unp_flags & UNP_FEIDSBIND) {
922 unp->unp_connid = unp2->unp_connid;
923 unp->unp_flags |= UNP_FEIDS;
924 }
925
926 so2 = so3;
927 } else {
928 if (so2 != so)
929 solock_pair(so, so2);
930 else
931 solock(so);
932 }
933
934 error = unp_connect2(so, so2);
935
936 sounlock(so);
937
938 /*
939 * `so2' can't be PRU_ABORT'ed concurrently
940 */
941 if (so2 != so)
942 sounlock(so2);
943 put:
944 vput(vp);
945 unlock:
946 KERNEL_UNLOCK();
947 solock(so);
948 unp->unp_flags &= ~UNP_CONNECTING;
949
950 /*
951 * The peer socket could be closed by concurrent thread
952 * when `so' and `vp' are unlocked.
953 */
954 if (error == 0 && unp->unp_conn == NULL)
955 error = ECONNREFUSED;
956
957 return (error);
958 }
959
960 int
unp_connect2(struct socket * so,struct socket * so2)961 unp_connect2(struct socket *so, struct socket *so2)
962 {
963 struct unpcb *unp = sotounpcb(so);
964 struct unpcb *unp2;
965
966 soassertlocked(so);
967 soassertlocked(so2);
968
969 if (so2->so_type != so->so_type)
970 return (EPROTOTYPE);
971 unp2 = sotounpcb(so2);
972 unp->unp_conn = unp2;
973 switch (so->so_type) {
974
975 case SOCK_DGRAM:
976 SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref);
977 soisconnected(so);
978 break;
979
980 case SOCK_STREAM:
981 case SOCK_SEQPACKET:
982 unp2->unp_conn = unp;
983 soisconnected(so);
984 soisconnected(so2);
985 break;
986
987 default:
988 panic("unp_connect2");
989 }
990 return (0);
991 }
992
993 void
unp_disconnect(struct unpcb * unp)994 unp_disconnect(struct unpcb *unp)
995 {
996 struct socket *so2;
997 struct unpcb *unp2;
998
999 if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL)
1000 return;
1001
1002 unp2 = unp->unp_conn;
1003 unp->unp_conn = NULL;
1004
1005 switch (unp->unp_socket->so_type) {
1006
1007 case SOCK_DGRAM:
1008 SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref);
1009 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1010 break;
1011
1012 case SOCK_STREAM:
1013 case SOCK_SEQPACKET:
1014 unp->unp_socket->so_snd.sb_mbcnt = 0;
1015 unp->unp_socket->so_snd.sb_cc = 0;
1016 soisdisconnected(unp->unp_socket);
1017 unp2->unp_conn = NULL;
1018 unp2->unp_socket->so_snd.sb_mbcnt = 0;
1019 unp2->unp_socket->so_snd.sb_cc = 0;
1020 soisdisconnected(unp2->unp_socket);
1021 break;
1022 }
1023
1024 if (so2 != unp->unp_socket)
1025 sounlock(so2);
1026 }
1027
1028 static struct unpcb *
fptounp(struct file * fp)1029 fptounp(struct file *fp)
1030 {
1031 struct socket *so;
1032
1033 if (fp->f_type != DTYPE_SOCKET)
1034 return (NULL);
1035 if ((so = fp->f_data) == NULL)
1036 return (NULL);
1037 if (so->so_proto->pr_domain != &unixdomain)
1038 return (NULL);
1039 return (sotounpcb(so));
1040 }
1041
1042 int
unp_externalize(struct mbuf * rights,socklen_t controllen,int flags)1043 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags)
1044 {
1045 struct proc *p = curproc; /* XXX */
1046 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1047 struct filedesc *fdp = p->p_fd;
1048 int i, *fds = NULL;
1049 struct fdpass *rp;
1050 struct file *fp;
1051 int nfds, error = 0;
1052
1053 /*
1054 * This code only works because SCM_RIGHTS is the only supported
1055 * control message type on unix sockets. Enforce this here.
1056 */
1057 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET)
1058 return EINVAL;
1059
1060 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
1061 sizeof(struct fdpass);
1062 if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr)))
1063 controllen = 0;
1064 else
1065 controllen -= CMSG_ALIGN(sizeof(struct cmsghdr));
1066 if (nfds > controllen / sizeof(int)) {
1067 error = EMSGSIZE;
1068 goto out;
1069 }
1070
1071 /* Make sure the recipient should be able to see the descriptors.. */
1072 rp = (struct fdpass *)CMSG_DATA(cm);
1073
1074 /* fdp->fd_rdir requires KERNEL_LOCK() */
1075 KERNEL_LOCK();
1076
1077 for (i = 0; i < nfds; i++) {
1078 fp = rp->fp;
1079 rp++;
1080 error = pledge_recvfd(p, fp);
1081 if (error)
1082 break;
1083
1084 /*
1085 * No to block devices. If passing a directory,
1086 * make sure that it is underneath the root.
1087 */
1088 if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) {
1089 struct vnode *vp = (struct vnode *)fp->f_data;
1090
1091 if (vp->v_type == VBLK ||
1092 (vp->v_type == VDIR &&
1093 !vn_isunder(vp, fdp->fd_rdir, p))) {
1094 error = EPERM;
1095 break;
1096 }
1097 }
1098 }
1099
1100 KERNEL_UNLOCK();
1101
1102 if (error)
1103 goto out;
1104
1105 fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK);
1106
1107 fdplock(fdp);
1108 restart:
1109 /*
1110 * First loop -- allocate file descriptor table slots for the
1111 * new descriptors.
1112 */
1113 rp = ((struct fdpass *)CMSG_DATA(cm));
1114 for (i = 0; i < nfds; i++) {
1115 if ((error = fdalloc(p, 0, &fds[i])) != 0) {
1116 /*
1117 * Back out what we've done so far.
1118 */
1119 for (--i; i >= 0; i--)
1120 fdremove(fdp, fds[i]);
1121
1122 if (error == ENOSPC) {
1123 fdexpand(p);
1124 goto restart;
1125 }
1126
1127 fdpunlock(fdp);
1128
1129 /*
1130 * This is the error that has historically
1131 * been returned, and some callers may
1132 * expect it.
1133 */
1134
1135 error = EMSGSIZE;
1136 goto out;
1137 }
1138
1139 /*
1140 * Make the slot reference the descriptor so that
1141 * fdalloc() works properly.. We finalize it all
1142 * in the loop below.
1143 */
1144 mtx_enter(&fdp->fd_fplock);
1145 KASSERT(fdp->fd_ofiles[fds[i]] == NULL);
1146 fdp->fd_ofiles[fds[i]] = rp->fp;
1147 mtx_leave(&fdp->fd_fplock);
1148
1149 fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED);
1150 if (flags & MSG_CMSG_CLOEXEC)
1151 fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE;
1152
1153 rp++;
1154 }
1155
1156 /*
1157 * Keep `fdp' locked to prevent concurrent close() of just
1158 * inserted descriptors. Such descriptors could have the only
1159 * `f_count' reference which is now shared between control
1160 * message and `fdp'.
1161 */
1162
1163 /*
1164 * Now that adding them has succeeded, update all of the
1165 * descriptor passing state.
1166 */
1167 rp = (struct fdpass *)CMSG_DATA(cm);
1168
1169 for (i = 0; i < nfds; i++) {
1170 struct unpcb *unp;
1171
1172 fp = rp->fp;
1173 rp++;
1174 if ((unp = fptounp(fp)) != NULL) {
1175 rw_enter_write(&unp_gc_lock);
1176 unp->unp_msgcount--;
1177 rw_exit_write(&unp_gc_lock);
1178 }
1179 }
1180 fdpunlock(fdp);
1181
1182 mtx_enter(&unp_rights_mtx);
1183 unp_rights -= nfds;
1184 mtx_leave(&unp_rights_mtx);
1185
1186 /*
1187 * Copy temporary array to message and adjust length, in case of
1188 * transition from large struct file pointers to ints.
1189 */
1190 memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int));
1191 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
1192 rights->m_len = CMSG_LEN(nfds * sizeof(int));
1193 out:
1194 if (fds != NULL)
1195 free(fds, M_TEMP, nfds * sizeof(int));
1196
1197 if (error) {
1198 if (nfds > 0) {
1199 /*
1200 * No lock required. We are the only `cm' holder.
1201 */
1202 rp = ((struct fdpass *)CMSG_DATA(cm));
1203 unp_discard(rp, nfds);
1204 }
1205 }
1206
1207 return (error);
1208 }
1209
1210 int
unp_internalize(struct mbuf * control,struct proc * p)1211 unp_internalize(struct mbuf *control, struct proc *p)
1212 {
1213 struct filedesc *fdp = p->p_fd;
1214 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1215 struct fdpass *rp;
1216 struct file *fp;
1217 struct unpcb *unp;
1218 int i, error;
1219 int nfds, *ip, fd, neededspace;
1220
1221 /*
1222 * Check for two potential msg_controllen values because
1223 * IETF stuck their nose in a place it does not belong.
1224 */
1225 if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0))
1226 return (EINVAL);
1227 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1228 !(cm->cmsg_len == control->m_len ||
1229 control->m_len == CMSG_ALIGN(cm->cmsg_len)))
1230 return (EINVAL);
1231 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
1232
1233 mtx_enter(&unp_rights_mtx);
1234 if (unp_rights + nfds > maxfiles / 10) {
1235 mtx_leave(&unp_rights_mtx);
1236 return (EMFILE);
1237 }
1238 unp_rights += nfds;
1239 mtx_leave(&unp_rights_mtx);
1240
1241 /* Make sure we have room for the struct file pointers */
1242 morespace:
1243 neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) -
1244 control->m_len;
1245 if (neededspace > m_trailingspace(control)) {
1246 char *tmp;
1247 /* if we already have a cluster, the message is just too big */
1248 if (control->m_flags & M_EXT) {
1249 error = E2BIG;
1250 goto nospace;
1251 }
1252
1253 /* copy cmsg data temporarily out of the mbuf */
1254 tmp = malloc(control->m_len, M_TEMP, M_WAITOK);
1255 memcpy(tmp, mtod(control, caddr_t), control->m_len);
1256
1257 /* allocate a cluster and try again */
1258 MCLGET(control, M_WAIT);
1259 if ((control->m_flags & M_EXT) == 0) {
1260 free(tmp, M_TEMP, control->m_len);
1261 error = ENOBUFS; /* allocation failed */
1262 goto nospace;
1263 }
1264
1265 /* copy the data back into the cluster */
1266 cm = mtod(control, struct cmsghdr *);
1267 memcpy(cm, tmp, control->m_len);
1268 free(tmp, M_TEMP, control->m_len);
1269 goto morespace;
1270 }
1271
1272 /* adjust message & mbuf to note amount of space actually used. */
1273 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass));
1274 control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass));
1275
1276 ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
1277 rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1;
1278 fdplock(fdp);
1279 for (i = 0; i < nfds; i++) {
1280 memcpy(&fd, ip, sizeof fd);
1281 ip--;
1282 if ((fp = fd_getfile(fdp, fd)) == NULL) {
1283 error = EBADF;
1284 goto fail;
1285 }
1286 if (fp->f_count >= FDUP_MAX_COUNT) {
1287 error = EDEADLK;
1288 goto fail;
1289 }
1290 error = pledge_sendfd(p, fp);
1291 if (error)
1292 goto fail;
1293
1294 /* kqueue descriptors cannot be copied */
1295 if (fp->f_type == DTYPE_KQUEUE) {
1296 error = EINVAL;
1297 goto fail;
1298 }
1299 #if NKCOV > 0
1300 /* kcov descriptors cannot be copied */
1301 if (fp->f_type == DTYPE_VNODE && kcov_vnode(fp->f_data)) {
1302 error = EINVAL;
1303 goto fail;
1304 }
1305 #endif
1306 rp->fp = fp;
1307 rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED;
1308 rp--;
1309 if ((unp = fptounp(fp)) != NULL) {
1310 rw_enter_write(&unp_gc_lock);
1311 unp->unp_msgcount++;
1312 unp->unp_file = fp;
1313 rw_exit_write(&unp_gc_lock);
1314 }
1315 }
1316 fdpunlock(fdp);
1317 return (0);
1318 fail:
1319 fdpunlock(fdp);
1320 if (fp != NULL)
1321 FRELE(fp, p);
1322 /* Back out what we just did. */
1323 for ( ; i > 0; i--) {
1324 rp++;
1325 fp = rp->fp;
1326 if ((unp = fptounp(fp)) != NULL) {
1327 rw_enter_write(&unp_gc_lock);
1328 unp->unp_msgcount--;
1329 rw_exit_write(&unp_gc_lock);
1330 }
1331 FRELE(fp, p);
1332 }
1333
1334 nospace:
1335 mtx_enter(&unp_rights_mtx);
1336 unp_rights -= nfds;
1337 mtx_leave(&unp_rights_mtx);
1338
1339 return (error);
1340 }
1341
1342 void
unp_gc(void * arg __unused)1343 unp_gc(void *arg __unused)
1344 {
1345 struct unp_deferral *defer;
1346 struct file *fp;
1347 struct socket *so;
1348 struct unpcb *unp;
1349 int nunref, i;
1350
1351 rw_enter_write(&unp_gc_lock);
1352 if (unp_gcing)
1353 goto unlock;
1354 unp_gcing = 1;
1355 rw_exit_write(&unp_gc_lock);
1356
1357 rw_enter_write(&unp_df_lock);
1358 /* close any fds on the deferred list */
1359 while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) {
1360 SLIST_REMOVE_HEAD(&unp_deferred, ud_link);
1361 rw_exit_write(&unp_df_lock);
1362 for (i = 0; i < defer->ud_n; i++) {
1363 fp = defer->ud_fp[i].fp;
1364 if (fp == NULL)
1365 continue;
1366 if ((unp = fptounp(fp)) != NULL) {
1367 rw_enter_write(&unp_gc_lock);
1368 unp->unp_msgcount--;
1369 rw_exit_write(&unp_gc_lock);
1370 }
1371 mtx_enter(&unp_rights_mtx);
1372 unp_rights--;
1373 mtx_leave(&unp_rights_mtx);
1374 /* closef() expects a refcount of 2 */
1375 FREF(fp);
1376 (void) closef(fp, NULL);
1377 }
1378 free(defer, M_TEMP, sizeof(*defer) +
1379 sizeof(struct fdpass) * defer->ud_n);
1380 rw_enter_write(&unp_df_lock);
1381 }
1382 rw_exit_write(&unp_df_lock);
1383
1384 nunref = 0;
1385
1386 rw_enter_write(&unp_gc_lock);
1387
1388 /*
1389 * Determine sockets which may be prospectively dead. Such
1390 * sockets have their `unp_msgcount' equal to the `f_count'.
1391 * If `unp_msgcount' is 0, the socket has not been passed
1392 * and can't be unreferenced.
1393 */
1394 LIST_FOREACH(unp, &unp_head, unp_link) {
1395 unp->unp_gcflags = 0;
1396
1397 if (unp->unp_msgcount == 0)
1398 continue;
1399 if ((fp = unp->unp_file) == NULL)
1400 continue;
1401 if (fp->f_count == unp->unp_msgcount) {
1402 unp->unp_gcflags |= UNP_GCDEAD;
1403 unp->unp_gcrefs = unp->unp_msgcount;
1404 nunref++;
1405 }
1406 }
1407
1408 /*
1409 * Scan all sockets previously marked as dead. Remove
1410 * the `unp_gcrefs' reference each socket holds on any
1411 * dead socket in its buffer.
1412 */
1413 LIST_FOREACH(unp, &unp_head, unp_link) {
1414 if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1415 continue;
1416 so = unp->unp_socket;
1417 mtx_enter(&so->so_rcv.sb_mtx);
1418 unp_scan(so->so_rcv.sb_mb, unp_remove_gcrefs);
1419 mtx_leave(&so->so_rcv.sb_mtx);
1420 }
1421
1422 /*
1423 * If the dead socket has `unp_gcrefs' reference counter
1424 * greater than 0, it can't be unreferenced. Mark it as
1425 * alive and increment the `unp_gcrefs' reference for each
1426 * dead socket within its buffer. Repeat this until we
1427 * have no new alive sockets found.
1428 */
1429 do {
1430 unp_defer = 0;
1431
1432 LIST_FOREACH(unp, &unp_head, unp_link) {
1433 if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1434 continue;
1435 if (unp->unp_gcrefs == 0)
1436 continue;
1437
1438 unp->unp_gcflags &= ~UNP_GCDEAD;
1439
1440 so = unp->unp_socket;
1441 mtx_enter(&so->so_rcv.sb_mtx);
1442 unp_scan(so->so_rcv.sb_mb, unp_restore_gcrefs);
1443 mtx_leave(&so->so_rcv.sb_mtx);
1444
1445 KASSERT(nunref > 0);
1446 nunref--;
1447 }
1448 } while (unp_defer > 0);
1449
1450 /*
1451 * If there are any unreferenced sockets, then for each dispose
1452 * of files in its receive buffer and then close it.
1453 */
1454 if (nunref) {
1455 LIST_FOREACH(unp, &unp_head, unp_link) {
1456 if (unp->unp_gcflags & UNP_GCDEAD) {
1457 struct sockbuf *sb = &unp->unp_socket->so_rcv;
1458 struct mbuf *m;
1459
1460 /*
1461 * This socket could still be connected
1462 * and if so it's `so_rcv' is still
1463 * accessible by concurrent PRU_SEND
1464 * thread.
1465 */
1466
1467 mtx_enter(&sb->sb_mtx);
1468 m = sb->sb_mb;
1469 memset(&sb->sb_startzero, 0,
1470 (caddr_t)&sb->sb_endzero -
1471 (caddr_t)&sb->sb_startzero);
1472 sb->sb_timeo_nsecs = INFSLP;
1473 mtx_leave(&sb->sb_mtx);
1474
1475 unp_scan(m, unp_discard);
1476 m_purge(m);
1477 }
1478 }
1479 }
1480
1481 unp_gcing = 0;
1482 unlock:
1483 rw_exit_write(&unp_gc_lock);
1484 }
1485
1486 void
unp_dispose(struct mbuf * m)1487 unp_dispose(struct mbuf *m)
1488 {
1489
1490 if (m)
1491 unp_scan(m, unp_discard);
1492 }
1493
1494 void
unp_scan(struct mbuf * m0,void (* op)(struct fdpass *,int))1495 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int))
1496 {
1497 struct mbuf *m;
1498 struct fdpass *rp;
1499 struct cmsghdr *cm;
1500 int qfds;
1501
1502 while (m0) {
1503 for (m = m0; m; m = m->m_next) {
1504 if (m->m_type == MT_CONTROL &&
1505 m->m_len >= sizeof(*cm)) {
1506 cm = mtod(m, struct cmsghdr *);
1507 if (cm->cmsg_level != SOL_SOCKET ||
1508 cm->cmsg_type != SCM_RIGHTS)
1509 continue;
1510 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
1511 / sizeof(struct fdpass);
1512 if (qfds > 0) {
1513 rp = (struct fdpass *)CMSG_DATA(cm);
1514 op(rp, qfds);
1515 }
1516 break; /* XXX, but saves time */
1517 }
1518 }
1519 m0 = m0->m_nextpkt;
1520 }
1521 }
1522
1523 void
unp_discard(struct fdpass * rp,int nfds)1524 unp_discard(struct fdpass *rp, int nfds)
1525 {
1526 struct unp_deferral *defer;
1527
1528 /* copy the file pointers to a deferral structure */
1529 defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK);
1530 defer->ud_n = nfds;
1531 memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds);
1532 memset(rp, 0, sizeof(*rp) * nfds);
1533
1534 rw_enter_write(&unp_df_lock);
1535 SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link);
1536 rw_exit_write(&unp_df_lock);
1537
1538 task_add(systqmp, &unp_gc_task);
1539 }
1540
1541 void
unp_remove_gcrefs(struct fdpass * rp,int nfds)1542 unp_remove_gcrefs(struct fdpass *rp, int nfds)
1543 {
1544 struct unpcb *unp;
1545 int i;
1546
1547 rw_assert_wrlock(&unp_gc_lock);
1548
1549 for (i = 0; i < nfds; i++) {
1550 if (rp[i].fp == NULL)
1551 continue;
1552 if ((unp = fptounp(rp[i].fp)) == NULL)
1553 continue;
1554 if (unp->unp_gcflags & UNP_GCDEAD) {
1555 KASSERT(unp->unp_gcrefs > 0);
1556 unp->unp_gcrefs--;
1557 }
1558 }
1559 }
1560
1561 void
unp_restore_gcrefs(struct fdpass * rp,int nfds)1562 unp_restore_gcrefs(struct fdpass *rp, int nfds)
1563 {
1564 struct unpcb *unp;
1565 int i;
1566
1567 rw_assert_wrlock(&unp_gc_lock);
1568
1569 for (i = 0; i < nfds; i++) {
1570 if (rp[i].fp == NULL)
1571 continue;
1572 if ((unp = fptounp(rp[i].fp)) == NULL)
1573 continue;
1574 if (unp->unp_gcflags & UNP_GCDEAD) {
1575 unp->unp_gcrefs++;
1576 unp_defer++;
1577 }
1578 }
1579 }
1580
1581 int
unp_nam2sun(struct mbuf * nam,struct sockaddr_un ** sun,size_t * pathlen)1582 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen)
1583 {
1584 struct sockaddr *sa = mtod(nam, struct sockaddr *);
1585 size_t size, len;
1586
1587 if (nam->m_len < offsetof(struct sockaddr, sa_data))
1588 return EINVAL;
1589 if (sa->sa_family != AF_UNIX)
1590 return EAFNOSUPPORT;
1591 if (sa->sa_len != nam->m_len)
1592 return EINVAL;
1593 if (sa->sa_len > sizeof(struct sockaddr_un))
1594 return EINVAL;
1595 *sun = (struct sockaddr_un *)sa;
1596
1597 /* ensure that sun_path is NUL terminated and fits */
1598 size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path);
1599 len = strnlen((*sun)->sun_path, size);
1600 if (len == sizeof((*sun)->sun_path))
1601 return EINVAL;
1602 if (len == size) {
1603 if (m_trailingspace(nam) == 0)
1604 return EINVAL;
1605 nam->m_len++;
1606 (*sun)->sun_len++;
1607 (*sun)->sun_path[len] = '\0';
1608 }
1609 if (pathlen != NULL)
1610 *pathlen = len;
1611
1612 return 0;
1613 }
1614