1 /* $OpenBSD: uipc_usrreq.c,v 1.206 2024/05/03 17:43:09 mvs Exp $ */
2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */
3
4 /*
5 * Copyright (c) 1982, 1986, 1989, 1991, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
33 */
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/filedesc.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/unpcb.h>
45 #include <sys/un.h>
46 #include <sys/namei.h>
47 #include <sys/vnode.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/mbuf.h>
51 #include <sys/task.h>
52 #include <sys/pledge.h>
53 #include <sys/pool.h>
54 #include <sys/rwlock.h>
55 #include <sys/mutex.h>
56 #include <sys/sysctl.h>
57 #include <sys/lock.h>
58 #include <sys/refcnt.h>
59
60 #include "kcov.h"
61 #if NKCOV > 0
62 #include <sys/kcov.h>
63 #endif
64
65 /*
66 * Locks used to protect global data and struct members:
67 * I immutable after creation
68 * D unp_df_lock
69 * G unp_gc_lock
70 * M unp_ino_mtx
71 * R unp_rights_mtx
72 * a atomic
73 * s socket lock
74 */
75
76 struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk");
77 struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk");
78
79 struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
80 struct mutex unp_ino_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
81
82 /*
83 * Stack of sets of files that were passed over a socket but were
84 * not received and need to be closed.
85 */
86 struct unp_deferral {
87 SLIST_ENTRY(unp_deferral) ud_link; /* [D] */
88 int ud_n; /* [I] */
89 /* followed by ud_n struct fdpass */
90 struct fdpass ud_fp[]; /* [I] */
91 };
92
93 void uipc_setaddr(const struct unpcb *, struct mbuf *);
94 void unp_discard(struct fdpass *, int);
95 void unp_remove_gcrefs(struct fdpass *, int);
96 void unp_restore_gcrefs(struct fdpass *, int);
97 void unp_scan(struct mbuf *, void (*)(struct fdpass *, int));
98 int unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *);
99 static inline void unp_ref(struct unpcb *);
100 static inline void unp_rele(struct unpcb *);
101 struct socket *unp_solock_peer(struct socket *);
102
103 struct pool unpcb_pool;
104 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL);
105
106 /*
107 * Unix communications domain.
108 *
109 * TODO:
110 * RDM
111 * rethink name space problems
112 * need a proper out-of-band
113 */
114 const struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
115
116 /* [G] list of all UNIX domain sockets, for unp_gc() */
117 LIST_HEAD(unp_head, unpcb) unp_head =
118 LIST_HEAD_INITIALIZER(unp_head);
119 /* [D] list of sets of files that were sent over sockets that are now closed */
120 SLIST_HEAD(,unp_deferral) unp_deferred =
121 SLIST_HEAD_INITIALIZER(unp_deferred);
122
123 ino_t unp_ino; /* [U] prototype for fake inode numbers */
124 int unp_rights; /* [R] file descriptors in flight */
125 int unp_defer; /* [G] number of deferred fp to close by the GC task */
126 int unp_gcing; /* [G] GC task currently running */
127
128 const struct pr_usrreqs uipc_usrreqs = {
129 .pru_attach = uipc_attach,
130 .pru_detach = uipc_detach,
131 .pru_bind = uipc_bind,
132 .pru_listen = uipc_listen,
133 .pru_connect = uipc_connect,
134 .pru_accept = uipc_accept,
135 .pru_disconnect = uipc_disconnect,
136 .pru_shutdown = uipc_shutdown,
137 .pru_rcvd = uipc_rcvd,
138 .pru_send = uipc_send,
139 .pru_abort = uipc_abort,
140 .pru_sense = uipc_sense,
141 .pru_sockaddr = uipc_sockaddr,
142 .pru_peeraddr = uipc_peeraddr,
143 .pru_connect2 = uipc_connect2,
144 };
145
146 const struct pr_usrreqs uipc_dgram_usrreqs = {
147 .pru_attach = uipc_attach,
148 .pru_detach = uipc_detach,
149 .pru_bind = uipc_bind,
150 .pru_listen = uipc_listen,
151 .pru_connect = uipc_connect,
152 .pru_disconnect = uipc_disconnect,
153 .pru_shutdown = uipc_dgram_shutdown,
154 .pru_send = uipc_dgram_send,
155 .pru_sense = uipc_sense,
156 .pru_sockaddr = uipc_sockaddr,
157 .pru_peeraddr = uipc_peeraddr,
158 .pru_connect2 = uipc_connect2,
159 };
160
161 void
unp_init(void)162 unp_init(void)
163 {
164 pool_init(&unpcb_pool, sizeof(struct unpcb), 0,
165 IPL_SOFTNET, 0, "unpcb", NULL);
166 }
167
168 static inline void
unp_ref(struct unpcb * unp)169 unp_ref(struct unpcb *unp)
170 {
171 refcnt_take(&unp->unp_refcnt);
172 }
173
174 static inline void
unp_rele(struct unpcb * unp)175 unp_rele(struct unpcb *unp)
176 {
177 refcnt_rele_wake(&unp->unp_refcnt);
178 }
179
180 struct socket *
unp_solock_peer(struct socket * so)181 unp_solock_peer(struct socket *so)
182 {
183 struct unpcb *unp, *unp2;
184 struct socket *so2;
185
186 unp = so->so_pcb;
187
188 again:
189 if ((unp2 = unp->unp_conn) == NULL)
190 return NULL;
191
192 so2 = unp2->unp_socket;
193
194 if (so < so2)
195 solock(so2);
196 else if (so > so2) {
197 unp_ref(unp2);
198 sounlock(so);
199 solock(so2);
200 solock(so);
201
202 /* Datagram socket could be reconnected due to re-lock. */
203 if (unp->unp_conn != unp2) {
204 sounlock(so2);
205 unp_rele(unp2);
206 goto again;
207 }
208
209 unp_rele(unp2);
210 }
211
212 return so2;
213 }
214
215 void
uipc_setaddr(const struct unpcb * unp,struct mbuf * nam)216 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam)
217 {
218 if (unp != NULL && unp->unp_addr != NULL) {
219 nam->m_len = unp->unp_addr->m_len;
220 memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t),
221 nam->m_len);
222 } else {
223 nam->m_len = sizeof(sun_noname);
224 memcpy(mtod(nam, struct sockaddr *), &sun_noname,
225 nam->m_len);
226 }
227 }
228
229 /*
230 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
231 * for stream sockets, although the total for sender and receiver is
232 * actually only PIPSIZ.
233 * Datagram sockets really use the sendspace as the maximum datagram size,
234 * and don't really want to reserve the sendspace. Their recvspace should
235 * be large enough for at least one max-size datagram plus address.
236 */
237 #define PIPSIZ 8192
238 u_int unpst_sendspace = PIPSIZ;
239 u_int unpst_recvspace = PIPSIZ;
240 u_int unpsq_sendspace = PIPSIZ;
241 u_int unpsq_recvspace = PIPSIZ;
242 u_int unpdg_sendspace = 2*1024; /* really max datagram size */
243 u_int unpdg_recvspace = 16*1024;
244
245 const struct sysctl_bounded_args unpstctl_vars[] = {
246 { UNPCTL_RECVSPACE, &unpst_recvspace, 0, SB_MAX },
247 { UNPCTL_SENDSPACE, &unpst_sendspace, 0, SB_MAX },
248 };
249 const struct sysctl_bounded_args unpsqctl_vars[] = {
250 { UNPCTL_RECVSPACE, &unpsq_recvspace, 0, SB_MAX },
251 { UNPCTL_SENDSPACE, &unpsq_sendspace, 0, SB_MAX },
252 };
253 const struct sysctl_bounded_args unpdgctl_vars[] = {
254 { UNPCTL_RECVSPACE, &unpdg_recvspace, 0, SB_MAX },
255 { UNPCTL_SENDSPACE, &unpdg_sendspace, 0, SB_MAX },
256 };
257
258 int
uipc_attach(struct socket * so,int proto,int wait)259 uipc_attach(struct socket *so, int proto, int wait)
260 {
261 struct unpcb *unp;
262 int error;
263
264 if (so->so_pcb)
265 return EISCONN;
266 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
267 switch (so->so_type) {
268
269 case SOCK_STREAM:
270 error = soreserve(so, unpst_sendspace, unpst_recvspace);
271 break;
272
273 case SOCK_SEQPACKET:
274 error = soreserve(so, unpsq_sendspace, unpsq_recvspace);
275 break;
276
277 case SOCK_DGRAM:
278 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
279 break;
280
281 default:
282 panic("unp_attach");
283 }
284 if (error)
285 return (error);
286 }
287 unp = pool_get(&unpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) |
288 PR_ZERO);
289 if (unp == NULL)
290 return (ENOBUFS);
291 refcnt_init(&unp->unp_refcnt);
292 unp->unp_socket = so;
293 so->so_pcb = unp;
294 getnanotime(&unp->unp_ctime);
295
296 rw_enter_write(&unp_gc_lock);
297 LIST_INSERT_HEAD(&unp_head, unp, unp_link);
298 rw_exit_write(&unp_gc_lock);
299
300 return (0);
301 }
302
303 int
uipc_detach(struct socket * so)304 uipc_detach(struct socket *so)
305 {
306 struct unpcb *unp = sotounpcb(so);
307
308 if (unp == NULL)
309 return (EINVAL);
310
311 unp_detach(unp);
312
313 return (0);
314 }
315
316 int
uipc_bind(struct socket * so,struct mbuf * nam,struct proc * p)317 uipc_bind(struct socket *so, struct mbuf *nam, struct proc *p)
318 {
319 struct unpcb *unp = sotounpcb(so);
320 struct sockaddr_un *soun;
321 struct mbuf *nam2;
322 struct vnode *vp;
323 struct vattr vattr;
324 int error;
325 struct nameidata nd;
326 size_t pathlen;
327
328 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
329 return (EINVAL);
330 if (unp->unp_vnode != NULL)
331 return (EINVAL);
332 if ((error = unp_nam2sun(nam, &soun, &pathlen)))
333 return (error);
334
335 unp->unp_flags |= UNP_BINDING;
336
337 /*
338 * Enforce `i_lock' -> `solock' because fifo subsystem
339 * requires it. The socket can't be closed concurrently
340 * because the file descriptor reference is still held.
341 */
342
343 sounlock(unp->unp_socket);
344
345 nam2 = m_getclr(M_WAITOK, MT_SONAME);
346 nam2->m_len = sizeof(struct sockaddr_un);
347 memcpy(mtod(nam2, struct sockaddr_un *), soun,
348 offsetof(struct sockaddr_un, sun_path) + pathlen);
349 /* No need to NUL terminate: m_getclr() returns zero'd mbufs. */
350
351 soun = mtod(nam2, struct sockaddr_un *);
352
353 /* Fixup sun_len to keep it in sync with m_len. */
354 soun->sun_len = nam2->m_len;
355
356 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
357 soun->sun_path, p);
358 nd.ni_pledge = PLEDGE_UNIX;
359 nd.ni_unveil = UNVEIL_CREATE;
360
361 KERNEL_LOCK();
362 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
363 error = namei(&nd);
364 if (error != 0) {
365 m_freem(nam2);
366 solock(unp->unp_socket);
367 goto out;
368 }
369 vp = nd.ni_vp;
370 if (vp != NULL) {
371 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
372 if (nd.ni_dvp == vp)
373 vrele(nd.ni_dvp);
374 else
375 vput(nd.ni_dvp);
376 vrele(vp);
377 m_freem(nam2);
378 error = EADDRINUSE;
379 solock(unp->unp_socket);
380 goto out;
381 }
382 VATTR_NULL(&vattr);
383 vattr.va_type = VSOCK;
384 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
385 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
386 vput(nd.ni_dvp);
387 if (error) {
388 m_freem(nam2);
389 solock(unp->unp_socket);
390 goto out;
391 }
392 solock(unp->unp_socket);
393 unp->unp_addr = nam2;
394 vp = nd.ni_vp;
395 vp->v_socket = unp->unp_socket;
396 unp->unp_vnode = vp;
397 unp->unp_connid.uid = p->p_ucred->cr_uid;
398 unp->unp_connid.gid = p->p_ucred->cr_gid;
399 unp->unp_connid.pid = p->p_p->ps_pid;
400 unp->unp_flags |= UNP_FEIDSBIND;
401 VOP_UNLOCK(vp);
402 out:
403 KERNEL_UNLOCK();
404 unp->unp_flags &= ~UNP_BINDING;
405
406 return (error);
407 }
408
409 int
uipc_listen(struct socket * so)410 uipc_listen(struct socket *so)
411 {
412 struct unpcb *unp = sotounpcb(so);
413
414 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
415 return (EINVAL);
416 if (unp->unp_vnode == NULL)
417 return (EINVAL);
418 return (0);
419 }
420
421 int
uipc_connect(struct socket * so,struct mbuf * nam)422 uipc_connect(struct socket *so, struct mbuf *nam)
423 {
424 return unp_connect(so, nam, curproc);
425 }
426
427 int
uipc_accept(struct socket * so,struct mbuf * nam)428 uipc_accept(struct socket *so, struct mbuf *nam)
429 {
430 struct socket *so2;
431 struct unpcb *unp = sotounpcb(so);
432
433 /*
434 * Pass back name of connected socket, if it was bound and
435 * we are still connected (our peer may have closed already!).
436 */
437 so2 = unp_solock_peer(so);
438 uipc_setaddr(unp->unp_conn, nam);
439
440 if (so2 != NULL && so2 != so)
441 sounlock(so2);
442 return (0);
443 }
444
445 int
uipc_disconnect(struct socket * so)446 uipc_disconnect(struct socket *so)
447 {
448 struct unpcb *unp = sotounpcb(so);
449
450 unp_disconnect(unp);
451 return (0);
452 }
453
454 int
uipc_shutdown(struct socket * so)455 uipc_shutdown(struct socket *so)
456 {
457 struct unpcb *unp = sotounpcb(so);
458 struct socket *so2;
459
460 socantsendmore(so);
461
462 if (unp->unp_conn != NULL) {
463 so2 = unp->unp_conn->unp_socket;
464 socantrcvmore(so2);
465 }
466
467 return (0);
468 }
469
470 int
uipc_dgram_shutdown(struct socket * so)471 uipc_dgram_shutdown(struct socket *so)
472 {
473 socantsendmore(so);
474 return (0);
475 }
476
477 void
uipc_rcvd(struct socket * so)478 uipc_rcvd(struct socket *so)
479 {
480 struct unpcb *unp = sotounpcb(so);
481 struct socket *so2;
482
483 if (unp->unp_conn == NULL)
484 return;
485 so2 = unp->unp_conn->unp_socket;
486
487 /*
488 * Adjust backpressure on sender
489 * and wakeup any waiting to write.
490 */
491 mtx_enter(&so->so_rcv.sb_mtx);
492 mtx_enter(&so2->so_snd.sb_mtx);
493 so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt;
494 so2->so_snd.sb_cc = so->so_rcv.sb_cc;
495 mtx_leave(&so2->so_snd.sb_mtx);
496 mtx_leave(&so->so_rcv.sb_mtx);
497 sowwakeup(so2);
498 }
499
500 int
uipc_send(struct socket * so,struct mbuf * m,struct mbuf * nam,struct mbuf * control)501 uipc_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
502 struct mbuf *control)
503 {
504 struct unpcb *unp = sotounpcb(so);
505 struct socket *so2;
506 int error = 0, dowakeup = 0;
507
508 if (control) {
509 sounlock(so);
510 error = unp_internalize(control, curproc);
511 solock(so);
512 if (error)
513 goto out;
514 }
515
516 if (unp->unp_conn == NULL) {
517 error = ENOTCONN;
518 goto dispose;
519 }
520
521 so2 = unp->unp_conn->unp_socket;
522
523 /*
524 * Send to paired receive port, and then raise
525 * send buffer counts to maintain backpressure.
526 * Wake up readers.
527 */
528 /*
529 * sbappend*() should be serialized together
530 * with so_snd modification.
531 */
532 mtx_enter(&so2->so_rcv.sb_mtx);
533 mtx_enter(&so->so_snd.sb_mtx);
534 if (so->so_snd.sb_state & SS_CANTSENDMORE) {
535 mtx_leave(&so->so_snd.sb_mtx);
536 mtx_leave(&so2->so_rcv.sb_mtx);
537 error = EPIPE;
538 goto dispose;
539 }
540 if (control) {
541 if (sbappendcontrol(so2, &so2->so_rcv, m, control)) {
542 control = NULL;
543 } else {
544 mtx_leave(&so->so_snd.sb_mtx);
545 mtx_leave(&so2->so_rcv.sb_mtx);
546 error = ENOBUFS;
547 goto dispose;
548 }
549 } else if (so->so_type == SOCK_SEQPACKET)
550 sbappendrecord(so2, &so2->so_rcv, m);
551 else
552 sbappend(so2, &so2->so_rcv, m);
553 so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt;
554 so->so_snd.sb_cc = so2->so_rcv.sb_cc;
555 if (so2->so_rcv.sb_cc > 0)
556 dowakeup = 1;
557 mtx_leave(&so->so_snd.sb_mtx);
558 mtx_leave(&so2->so_rcv.sb_mtx);
559
560 if (dowakeup)
561 sorwakeup(so2);
562
563 m = NULL;
564
565 dispose:
566 /* we need to undo unp_internalize in case of errors */
567 if (control && error)
568 unp_dispose(control);
569
570 out:
571 m_freem(control);
572 m_freem(m);
573
574 return (error);
575 }
576
577 int
uipc_dgram_send(struct socket * so,struct mbuf * m,struct mbuf * nam,struct mbuf * control)578 uipc_dgram_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
579 struct mbuf *control)
580 {
581 struct unpcb *unp = sotounpcb(so);
582 struct socket *so2;
583 const struct sockaddr *from;
584 int error = 0, dowakeup = 0;
585
586 if (control) {
587 sounlock(so);
588 error = unp_internalize(control, curproc);
589 solock(so);
590 if (error)
591 goto out;
592 }
593
594 if (nam) {
595 if (unp->unp_conn) {
596 error = EISCONN;
597 goto dispose;
598 }
599 error = unp_connect(so, nam, curproc);
600 if (error)
601 goto dispose;
602 }
603
604 if (unp->unp_conn == NULL) {
605 if (nam != NULL)
606 error = ECONNREFUSED;
607 else
608 error = ENOTCONN;
609 goto dispose;
610 }
611
612 so2 = unp->unp_conn->unp_socket;
613
614 if (unp->unp_addr)
615 from = mtod(unp->unp_addr, struct sockaddr *);
616 else
617 from = &sun_noname;
618
619 mtx_enter(&so2->so_rcv.sb_mtx);
620 if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) {
621 dowakeup = 1;
622 m = NULL;
623 control = NULL;
624 } else
625 error = ENOBUFS;
626 mtx_leave(&so2->so_rcv.sb_mtx);
627
628 if (dowakeup)
629 sorwakeup(so2);
630 if (nam)
631 unp_disconnect(unp);
632
633 dispose:
634 /* we need to undo unp_internalize in case of errors */
635 if (control && error)
636 unp_dispose(control);
637
638 out:
639 m_freem(control);
640 m_freem(m);
641
642 return (error);
643 }
644
645 void
uipc_abort(struct socket * so)646 uipc_abort(struct socket *so)
647 {
648 struct unpcb *unp = sotounpcb(so);
649
650 unp_detach(unp);
651 sofree(so, 0);
652 }
653
654 int
uipc_sense(struct socket * so,struct stat * sb)655 uipc_sense(struct socket *so, struct stat *sb)
656 {
657 struct unpcb *unp = sotounpcb(so);
658
659 sb->st_blksize = so->so_snd.sb_hiwat;
660 sb->st_dev = NODEV;
661 mtx_enter(&unp_ino_mtx);
662 if (unp->unp_ino == 0)
663 unp->unp_ino = unp_ino++;
664 mtx_leave(&unp_ino_mtx);
665 sb->st_atim.tv_sec =
666 sb->st_mtim.tv_sec =
667 sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec;
668 sb->st_atim.tv_nsec =
669 sb->st_mtim.tv_nsec =
670 sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec;
671 sb->st_ino = unp->unp_ino;
672
673 return (0);
674 }
675
676 int
uipc_sockaddr(struct socket * so,struct mbuf * nam)677 uipc_sockaddr(struct socket *so, struct mbuf *nam)
678 {
679 struct unpcb *unp = sotounpcb(so);
680
681 uipc_setaddr(unp, nam);
682 return (0);
683 }
684
685 int
uipc_peeraddr(struct socket * so,struct mbuf * nam)686 uipc_peeraddr(struct socket *so, struct mbuf *nam)
687 {
688 struct unpcb *unp = sotounpcb(so);
689 struct socket *so2;
690
691 so2 = unp_solock_peer(so);
692 uipc_setaddr(unp->unp_conn, nam);
693 if (so2 != NULL && so2 != so)
694 sounlock(so2);
695 return (0);
696 }
697
698 int
uipc_connect2(struct socket * so,struct socket * so2)699 uipc_connect2(struct socket *so, struct socket *so2)
700 {
701 struct unpcb *unp = sotounpcb(so), *unp2;
702 int error;
703
704 if ((error = unp_connect2(so, so2)))
705 return (error);
706
707 unp->unp_connid.uid = curproc->p_ucred->cr_uid;
708 unp->unp_connid.gid = curproc->p_ucred->cr_gid;
709 unp->unp_connid.pid = curproc->p_p->ps_pid;
710 unp->unp_flags |= UNP_FEIDS;
711 unp2 = sotounpcb(so2);
712 unp2->unp_connid.uid = curproc->p_ucred->cr_uid;
713 unp2->unp_connid.gid = curproc->p_ucred->cr_gid;
714 unp2->unp_connid.pid = curproc->p_p->ps_pid;
715 unp2->unp_flags |= UNP_FEIDS;
716
717 return (0);
718 }
719
720 int
uipc_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)721 uipc_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
722 size_t newlen)
723 {
724 int *valp = &unp_defer;
725
726 /* All sysctl names at this level are terminal. */
727 switch (name[0]) {
728 case SOCK_STREAM:
729 if (namelen != 2)
730 return (ENOTDIR);
731 return sysctl_bounded_arr(unpstctl_vars, nitems(unpstctl_vars),
732 name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
733 case SOCK_SEQPACKET:
734 if (namelen != 2)
735 return (ENOTDIR);
736 return sysctl_bounded_arr(unpsqctl_vars, nitems(unpsqctl_vars),
737 name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
738 case SOCK_DGRAM:
739 if (namelen != 2)
740 return (ENOTDIR);
741 return sysctl_bounded_arr(unpdgctl_vars, nitems(unpdgctl_vars),
742 name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
743 case NET_UNIX_INFLIGHT:
744 valp = &unp_rights;
745 /* FALLTHROUGH */
746 case NET_UNIX_DEFERRED:
747 if (namelen != 1)
748 return (ENOTDIR);
749 return sysctl_rdint(oldp, oldlenp, newp, *valp);
750 default:
751 return (ENOPROTOOPT);
752 }
753 }
754
755 void
unp_detach(struct unpcb * unp)756 unp_detach(struct unpcb *unp)
757 {
758 struct socket *so = unp->unp_socket;
759 struct vnode *vp = unp->unp_vnode;
760 struct unpcb *unp2;
761
762 unp->unp_vnode = NULL;
763
764 /*
765 * Enforce `i_lock' -> `solock()' lock order.
766 */
767 sounlock(so);
768
769 rw_enter_write(&unp_gc_lock);
770 LIST_REMOVE(unp, unp_link);
771 rw_exit_write(&unp_gc_lock);
772
773 if (vp != NULL) {
774 VOP_LOCK(vp, LK_EXCLUSIVE);
775 vp->v_socket = NULL;
776
777 KERNEL_LOCK();
778 vput(vp);
779 KERNEL_UNLOCK();
780 }
781
782 solock(so);
783
784 if (unp->unp_conn != NULL) {
785 /*
786 * Datagram socket could be connected to itself.
787 * Such socket will be disconnected here.
788 */
789 unp_disconnect(unp);
790 }
791
792 while ((unp2 = SLIST_FIRST(&unp->unp_refs)) != NULL) {
793 struct socket *so2 = unp2->unp_socket;
794
795 if (so < so2)
796 solock(so2);
797 else {
798 unp_ref(unp2);
799 sounlock(so);
800 solock(so2);
801 solock(so);
802
803 if (unp2->unp_conn != unp) {
804 /* `unp2' was disconnected due to re-lock. */
805 sounlock(so2);
806 unp_rele(unp2);
807 continue;
808 }
809
810 unp_rele(unp2);
811 }
812
813 unp2->unp_conn = NULL;
814 SLIST_REMOVE(&unp->unp_refs, unp2, unpcb, unp_nextref);
815 so2->so_error = ECONNRESET;
816 so2->so_state &= ~SS_ISCONNECTED;
817
818 sounlock(so2);
819 }
820
821 sounlock(so);
822 refcnt_finalize(&unp->unp_refcnt, "unpfinal");
823 solock(so);
824
825 soisdisconnected(so);
826 so->so_pcb = NULL;
827 m_freem(unp->unp_addr);
828 pool_put(&unpcb_pool, unp);
829 if (unp_rights)
830 task_add(systqmp, &unp_gc_task);
831 }
832
833 int
unp_connect(struct socket * so,struct mbuf * nam,struct proc * p)834 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
835 {
836 struct sockaddr_un *soun;
837 struct vnode *vp;
838 struct socket *so2, *so3;
839 struct unpcb *unp, *unp2, *unp3;
840 struct nameidata nd;
841 int error;
842
843 unp = sotounpcb(so);
844 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
845 return (EISCONN);
846 if ((error = unp_nam2sun(nam, &soun, NULL)))
847 return (error);
848
849 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
850 nd.ni_pledge = PLEDGE_UNIX;
851 nd.ni_unveil = UNVEIL_WRITE;
852
853 unp->unp_flags |= UNP_CONNECTING;
854
855 /*
856 * Enforce `i_lock' -> `solock' because fifo subsystem
857 * requires it. The socket can't be closed concurrently
858 * because the file descriptor reference is still held.
859 */
860
861 sounlock(so);
862
863 KERNEL_LOCK();
864 error = namei(&nd);
865 if (error != 0)
866 goto unlock;
867 vp = nd.ni_vp;
868 if (vp->v_type != VSOCK) {
869 error = ENOTSOCK;
870 goto put;
871 }
872 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
873 goto put;
874 so2 = vp->v_socket;
875 if (so2 == NULL) {
876 error = ECONNREFUSED;
877 goto put;
878 }
879 if (so->so_type != so2->so_type) {
880 error = EPROTOTYPE;
881 goto put;
882 }
883
884 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
885 solock(so2);
886
887 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
888 (so3 = sonewconn(so2, 0, M_WAIT)) == NULL) {
889 error = ECONNREFUSED;
890 }
891
892 sounlock(so2);
893
894 if (error != 0)
895 goto put;
896
897 /*
898 * Since `so2' is protected by vnode(9) lock, `so3'
899 * can't be PRU_ABORT'ed here.
900 */
901 solock_pair(so, so3);
902
903 unp2 = sotounpcb(so2);
904 unp3 = sotounpcb(so3);
905
906 /*
907 * `unp_addr', `unp_connid' and 'UNP_FEIDSBIND' flag
908 * are immutable since we set them in uipc_bind().
909 */
910 if (unp2->unp_addr)
911 unp3->unp_addr =
912 m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT);
913 unp3->unp_connid.uid = p->p_ucred->cr_uid;
914 unp3->unp_connid.gid = p->p_ucred->cr_gid;
915 unp3->unp_connid.pid = p->p_p->ps_pid;
916 unp3->unp_flags |= UNP_FEIDS;
917
918 if (unp2->unp_flags & UNP_FEIDSBIND) {
919 unp->unp_connid = unp2->unp_connid;
920 unp->unp_flags |= UNP_FEIDS;
921 }
922
923 so2 = so3;
924 } else {
925 if (so2 != so)
926 solock_pair(so, so2);
927 else
928 solock(so);
929 }
930
931 error = unp_connect2(so, so2);
932
933 sounlock(so);
934
935 /*
936 * `so2' can't be PRU_ABORT'ed concurrently
937 */
938 if (so2 != so)
939 sounlock(so2);
940 put:
941 vput(vp);
942 unlock:
943 KERNEL_UNLOCK();
944 solock(so);
945 unp->unp_flags &= ~UNP_CONNECTING;
946
947 /*
948 * The peer socket could be closed by concurrent thread
949 * when `so' and `vp' are unlocked.
950 */
951 if (error == 0 && unp->unp_conn == NULL)
952 error = ECONNREFUSED;
953
954 return (error);
955 }
956
957 int
unp_connect2(struct socket * so,struct socket * so2)958 unp_connect2(struct socket *so, struct socket *so2)
959 {
960 struct unpcb *unp = sotounpcb(so);
961 struct unpcb *unp2;
962
963 soassertlocked(so);
964 soassertlocked(so2);
965
966 if (so2->so_type != so->so_type)
967 return (EPROTOTYPE);
968 unp2 = sotounpcb(so2);
969 unp->unp_conn = unp2;
970 switch (so->so_type) {
971
972 case SOCK_DGRAM:
973 SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref);
974 soisconnected(so);
975 break;
976
977 case SOCK_STREAM:
978 case SOCK_SEQPACKET:
979 unp2->unp_conn = unp;
980 soisconnected(so);
981 soisconnected(so2);
982 break;
983
984 default:
985 panic("unp_connect2");
986 }
987 return (0);
988 }
989
990 void
unp_disconnect(struct unpcb * unp)991 unp_disconnect(struct unpcb *unp)
992 {
993 struct socket *so2;
994 struct unpcb *unp2;
995
996 if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL)
997 return;
998
999 unp2 = unp->unp_conn;
1000 unp->unp_conn = NULL;
1001
1002 switch (unp->unp_socket->so_type) {
1003
1004 case SOCK_DGRAM:
1005 SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref);
1006 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1007 break;
1008
1009 case SOCK_STREAM:
1010 case SOCK_SEQPACKET:
1011 unp->unp_socket->so_snd.sb_mbcnt = 0;
1012 unp->unp_socket->so_snd.sb_cc = 0;
1013 soisdisconnected(unp->unp_socket);
1014 unp2->unp_conn = NULL;
1015 unp2->unp_socket->so_snd.sb_mbcnt = 0;
1016 unp2->unp_socket->so_snd.sb_cc = 0;
1017 soisdisconnected(unp2->unp_socket);
1018 break;
1019 }
1020
1021 if (so2 != unp->unp_socket)
1022 sounlock(so2);
1023 }
1024
1025 static struct unpcb *
fptounp(struct file * fp)1026 fptounp(struct file *fp)
1027 {
1028 struct socket *so;
1029
1030 if (fp->f_type != DTYPE_SOCKET)
1031 return (NULL);
1032 if ((so = fp->f_data) == NULL)
1033 return (NULL);
1034 if (so->so_proto->pr_domain != &unixdomain)
1035 return (NULL);
1036 return (sotounpcb(so));
1037 }
1038
1039 int
unp_externalize(struct mbuf * rights,socklen_t controllen,int flags)1040 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags)
1041 {
1042 struct proc *p = curproc; /* XXX */
1043 struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1044 struct filedesc *fdp = p->p_fd;
1045 int i, *fds = NULL;
1046 struct fdpass *rp;
1047 struct file *fp;
1048 int nfds, error = 0;
1049
1050 /*
1051 * This code only works because SCM_RIGHTS is the only supported
1052 * control message type on unix sockets. Enforce this here.
1053 */
1054 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET)
1055 return EINVAL;
1056
1057 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
1058 sizeof(struct fdpass);
1059 if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr)))
1060 controllen = 0;
1061 else
1062 controllen -= CMSG_ALIGN(sizeof(struct cmsghdr));
1063 if (nfds > controllen / sizeof(int)) {
1064 error = EMSGSIZE;
1065 goto out;
1066 }
1067
1068 /* Make sure the recipient should be able to see the descriptors.. */
1069 rp = (struct fdpass *)CMSG_DATA(cm);
1070
1071 /* fdp->fd_rdir requires KERNEL_LOCK() */
1072 KERNEL_LOCK();
1073
1074 for (i = 0; i < nfds; i++) {
1075 fp = rp->fp;
1076 rp++;
1077 error = pledge_recvfd(p, fp);
1078 if (error)
1079 break;
1080
1081 /*
1082 * No to block devices. If passing a directory,
1083 * make sure that it is underneath the root.
1084 */
1085 if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) {
1086 struct vnode *vp = (struct vnode *)fp->f_data;
1087
1088 if (vp->v_type == VBLK ||
1089 (vp->v_type == VDIR &&
1090 !vn_isunder(vp, fdp->fd_rdir, p))) {
1091 error = EPERM;
1092 break;
1093 }
1094 }
1095 }
1096
1097 KERNEL_UNLOCK();
1098
1099 if (error)
1100 goto out;
1101
1102 fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK);
1103
1104 fdplock(fdp);
1105 restart:
1106 /*
1107 * First loop -- allocate file descriptor table slots for the
1108 * new descriptors.
1109 */
1110 rp = ((struct fdpass *)CMSG_DATA(cm));
1111 for (i = 0; i < nfds; i++) {
1112 if ((error = fdalloc(p, 0, &fds[i])) != 0) {
1113 /*
1114 * Back out what we've done so far.
1115 */
1116 for (--i; i >= 0; i--)
1117 fdremove(fdp, fds[i]);
1118
1119 if (error == ENOSPC) {
1120 fdexpand(p);
1121 goto restart;
1122 }
1123
1124 fdpunlock(fdp);
1125
1126 /*
1127 * This is the error that has historically
1128 * been returned, and some callers may
1129 * expect it.
1130 */
1131
1132 error = EMSGSIZE;
1133 goto out;
1134 }
1135
1136 /*
1137 * Make the slot reference the descriptor so that
1138 * fdalloc() works properly.. We finalize it all
1139 * in the loop below.
1140 */
1141 mtx_enter(&fdp->fd_fplock);
1142 KASSERT(fdp->fd_ofiles[fds[i]] == NULL);
1143 fdp->fd_ofiles[fds[i]] = rp->fp;
1144 mtx_leave(&fdp->fd_fplock);
1145
1146 fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED);
1147 if (flags & MSG_CMSG_CLOEXEC)
1148 fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE;
1149
1150 rp++;
1151 }
1152
1153 /*
1154 * Keep `fdp' locked to prevent concurrent close() of just
1155 * inserted descriptors. Such descriptors could have the only
1156 * `f_count' reference which is now shared between control
1157 * message and `fdp'.
1158 */
1159
1160 /*
1161 * Now that adding them has succeeded, update all of the
1162 * descriptor passing state.
1163 */
1164 rp = (struct fdpass *)CMSG_DATA(cm);
1165
1166 for (i = 0; i < nfds; i++) {
1167 struct unpcb *unp;
1168
1169 fp = rp->fp;
1170 rp++;
1171 if ((unp = fptounp(fp)) != NULL) {
1172 rw_enter_write(&unp_gc_lock);
1173 unp->unp_msgcount--;
1174 rw_exit_write(&unp_gc_lock);
1175 }
1176 }
1177 fdpunlock(fdp);
1178
1179 mtx_enter(&unp_rights_mtx);
1180 unp_rights -= nfds;
1181 mtx_leave(&unp_rights_mtx);
1182
1183 /*
1184 * Copy temporary array to message and adjust length, in case of
1185 * transition from large struct file pointers to ints.
1186 */
1187 memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int));
1188 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
1189 rights->m_len = CMSG_LEN(nfds * sizeof(int));
1190 out:
1191 if (fds != NULL)
1192 free(fds, M_TEMP, nfds * sizeof(int));
1193
1194 if (error) {
1195 if (nfds > 0) {
1196 /*
1197 * No lock required. We are the only `cm' holder.
1198 */
1199 rp = ((struct fdpass *)CMSG_DATA(cm));
1200 unp_discard(rp, nfds);
1201 }
1202 }
1203
1204 return (error);
1205 }
1206
1207 int
unp_internalize(struct mbuf * control,struct proc * p)1208 unp_internalize(struct mbuf *control, struct proc *p)
1209 {
1210 struct filedesc *fdp = p->p_fd;
1211 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1212 struct fdpass *rp;
1213 struct file *fp;
1214 struct unpcb *unp;
1215 int i, error;
1216 int nfds, *ip, fd, neededspace;
1217
1218 /*
1219 * Check for two potential msg_controllen values because
1220 * IETF stuck their nose in a place it does not belong.
1221 */
1222 if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0))
1223 return (EINVAL);
1224 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1225 !(cm->cmsg_len == control->m_len ||
1226 control->m_len == CMSG_ALIGN(cm->cmsg_len)))
1227 return (EINVAL);
1228 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
1229
1230 mtx_enter(&unp_rights_mtx);
1231 if (unp_rights + nfds > maxfiles / 10) {
1232 mtx_leave(&unp_rights_mtx);
1233 return (EMFILE);
1234 }
1235 unp_rights += nfds;
1236 mtx_leave(&unp_rights_mtx);
1237
1238 /* Make sure we have room for the struct file pointers */
1239 morespace:
1240 neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) -
1241 control->m_len;
1242 if (neededspace > m_trailingspace(control)) {
1243 char *tmp;
1244 /* if we already have a cluster, the message is just too big */
1245 if (control->m_flags & M_EXT) {
1246 error = E2BIG;
1247 goto nospace;
1248 }
1249
1250 /* copy cmsg data temporarily out of the mbuf */
1251 tmp = malloc(control->m_len, M_TEMP, M_WAITOK);
1252 memcpy(tmp, mtod(control, caddr_t), control->m_len);
1253
1254 /* allocate a cluster and try again */
1255 MCLGET(control, M_WAIT);
1256 if ((control->m_flags & M_EXT) == 0) {
1257 free(tmp, M_TEMP, control->m_len);
1258 error = ENOBUFS; /* allocation failed */
1259 goto nospace;
1260 }
1261
1262 /* copy the data back into the cluster */
1263 cm = mtod(control, struct cmsghdr *);
1264 memcpy(cm, tmp, control->m_len);
1265 free(tmp, M_TEMP, control->m_len);
1266 goto morespace;
1267 }
1268
1269 /* adjust message & mbuf to note amount of space actually used. */
1270 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass));
1271 control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass));
1272
1273 ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
1274 rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1;
1275 fdplock(fdp);
1276 for (i = 0; i < nfds; i++) {
1277 memcpy(&fd, ip, sizeof fd);
1278 ip--;
1279 if ((fp = fd_getfile(fdp, fd)) == NULL) {
1280 error = EBADF;
1281 goto fail;
1282 }
1283 if (fp->f_count >= FDUP_MAX_COUNT) {
1284 error = EDEADLK;
1285 goto fail;
1286 }
1287 error = pledge_sendfd(p, fp);
1288 if (error)
1289 goto fail;
1290
1291 /* kqueue descriptors cannot be copied */
1292 if (fp->f_type == DTYPE_KQUEUE) {
1293 error = EINVAL;
1294 goto fail;
1295 }
1296 #if NKCOV > 0
1297 /* kcov descriptors cannot be copied */
1298 if (fp->f_type == DTYPE_VNODE && kcov_vnode(fp->f_data)) {
1299 error = EINVAL;
1300 goto fail;
1301 }
1302 #endif
1303 rp->fp = fp;
1304 rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED;
1305 rp--;
1306 if ((unp = fptounp(fp)) != NULL) {
1307 rw_enter_write(&unp_gc_lock);
1308 unp->unp_msgcount++;
1309 unp->unp_file = fp;
1310 rw_exit_write(&unp_gc_lock);
1311 }
1312 }
1313 fdpunlock(fdp);
1314 return (0);
1315 fail:
1316 fdpunlock(fdp);
1317 if (fp != NULL)
1318 FRELE(fp, p);
1319 /* Back out what we just did. */
1320 for ( ; i > 0; i--) {
1321 rp++;
1322 fp = rp->fp;
1323 if ((unp = fptounp(fp)) != NULL) {
1324 rw_enter_write(&unp_gc_lock);
1325 unp->unp_msgcount--;
1326 rw_exit_write(&unp_gc_lock);
1327 }
1328 FRELE(fp, p);
1329 }
1330
1331 nospace:
1332 mtx_enter(&unp_rights_mtx);
1333 unp_rights -= nfds;
1334 mtx_leave(&unp_rights_mtx);
1335
1336 return (error);
1337 }
1338
1339 void
unp_gc(void * arg __unused)1340 unp_gc(void *arg __unused)
1341 {
1342 struct unp_deferral *defer;
1343 struct file *fp;
1344 struct socket *so;
1345 struct unpcb *unp;
1346 int nunref, i;
1347
1348 rw_enter_write(&unp_gc_lock);
1349 if (unp_gcing)
1350 goto unlock;
1351 unp_gcing = 1;
1352 rw_exit_write(&unp_gc_lock);
1353
1354 rw_enter_write(&unp_df_lock);
1355 /* close any fds on the deferred list */
1356 while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) {
1357 SLIST_REMOVE_HEAD(&unp_deferred, ud_link);
1358 rw_exit_write(&unp_df_lock);
1359 for (i = 0; i < defer->ud_n; i++) {
1360 fp = defer->ud_fp[i].fp;
1361 if (fp == NULL)
1362 continue;
1363 if ((unp = fptounp(fp)) != NULL) {
1364 rw_enter_write(&unp_gc_lock);
1365 unp->unp_msgcount--;
1366 rw_exit_write(&unp_gc_lock);
1367 }
1368 mtx_enter(&unp_rights_mtx);
1369 unp_rights--;
1370 mtx_leave(&unp_rights_mtx);
1371 /* closef() expects a refcount of 2 */
1372 FREF(fp);
1373 (void) closef(fp, NULL);
1374 }
1375 free(defer, M_TEMP, sizeof(*defer) +
1376 sizeof(struct fdpass) * defer->ud_n);
1377 rw_enter_write(&unp_df_lock);
1378 }
1379 rw_exit_write(&unp_df_lock);
1380
1381 nunref = 0;
1382
1383 rw_enter_write(&unp_gc_lock);
1384
1385 /*
1386 * Determine sockets which may be prospectively dead. Such
1387 * sockets have their `unp_msgcount' equal to the `f_count'.
1388 * If `unp_msgcount' is 0, the socket has not been passed
1389 * and can't be unreferenced.
1390 */
1391 LIST_FOREACH(unp, &unp_head, unp_link) {
1392 unp->unp_gcflags = 0;
1393
1394 if (unp->unp_msgcount == 0)
1395 continue;
1396 if ((fp = unp->unp_file) == NULL)
1397 continue;
1398 if (fp->f_count == unp->unp_msgcount) {
1399 unp->unp_gcflags |= UNP_GCDEAD;
1400 unp->unp_gcrefs = unp->unp_msgcount;
1401 nunref++;
1402 }
1403 }
1404
1405 /*
1406 * Scan all sockets previously marked as dead. Remove
1407 * the `unp_gcrefs' reference each socket holds on any
1408 * dead socket in its buffer.
1409 */
1410 LIST_FOREACH(unp, &unp_head, unp_link) {
1411 if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1412 continue;
1413 so = unp->unp_socket;
1414 mtx_enter(&so->so_rcv.sb_mtx);
1415 unp_scan(so->so_rcv.sb_mb, unp_remove_gcrefs);
1416 mtx_leave(&so->so_rcv.sb_mtx);
1417 }
1418
1419 /*
1420 * If the dead socket has `unp_gcrefs' reference counter
1421 * greater than 0, it can't be unreferenced. Mark it as
1422 * alive and increment the `unp_gcrefs' reference for each
1423 * dead socket within its buffer. Repeat this until we
1424 * have no new alive sockets found.
1425 */
1426 do {
1427 unp_defer = 0;
1428
1429 LIST_FOREACH(unp, &unp_head, unp_link) {
1430 if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1431 continue;
1432 if (unp->unp_gcrefs == 0)
1433 continue;
1434
1435 unp->unp_gcflags &= ~UNP_GCDEAD;
1436
1437 so = unp->unp_socket;
1438 mtx_enter(&so->so_rcv.sb_mtx);
1439 unp_scan(so->so_rcv.sb_mb, unp_restore_gcrefs);
1440 mtx_leave(&so->so_rcv.sb_mtx);
1441
1442 KASSERT(nunref > 0);
1443 nunref--;
1444 }
1445 } while (unp_defer > 0);
1446
1447 /*
1448 * If there are any unreferenced sockets, then for each dispose
1449 * of files in its receive buffer and then close it.
1450 */
1451 if (nunref) {
1452 LIST_FOREACH(unp, &unp_head, unp_link) {
1453 if (unp->unp_gcflags & UNP_GCDEAD) {
1454 struct sockbuf *sb = &unp->unp_socket->so_rcv;
1455 struct mbuf *m;
1456
1457 /*
1458 * This socket could still be connected
1459 * and if so it's `so_rcv' is still
1460 * accessible by concurrent PRU_SEND
1461 * thread.
1462 */
1463
1464 mtx_enter(&sb->sb_mtx);
1465 m = sb->sb_mb;
1466 memset(&sb->sb_startzero, 0,
1467 (caddr_t)&sb->sb_endzero -
1468 (caddr_t)&sb->sb_startzero);
1469 sb->sb_timeo_nsecs = INFSLP;
1470 mtx_leave(&sb->sb_mtx);
1471
1472 unp_scan(m, unp_discard);
1473 m_purge(m);
1474 }
1475 }
1476 }
1477
1478 unp_gcing = 0;
1479 unlock:
1480 rw_exit_write(&unp_gc_lock);
1481 }
1482
1483 void
unp_dispose(struct mbuf * m)1484 unp_dispose(struct mbuf *m)
1485 {
1486
1487 if (m)
1488 unp_scan(m, unp_discard);
1489 }
1490
1491 void
unp_scan(struct mbuf * m0,void (* op)(struct fdpass *,int))1492 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int))
1493 {
1494 struct mbuf *m;
1495 struct fdpass *rp;
1496 struct cmsghdr *cm;
1497 int qfds;
1498
1499 while (m0) {
1500 for (m = m0; m; m = m->m_next) {
1501 if (m->m_type == MT_CONTROL &&
1502 m->m_len >= sizeof(*cm)) {
1503 cm = mtod(m, struct cmsghdr *);
1504 if (cm->cmsg_level != SOL_SOCKET ||
1505 cm->cmsg_type != SCM_RIGHTS)
1506 continue;
1507 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
1508 / sizeof(struct fdpass);
1509 if (qfds > 0) {
1510 rp = (struct fdpass *)CMSG_DATA(cm);
1511 op(rp, qfds);
1512 }
1513 break; /* XXX, but saves time */
1514 }
1515 }
1516 m0 = m0->m_nextpkt;
1517 }
1518 }
1519
1520 void
unp_discard(struct fdpass * rp,int nfds)1521 unp_discard(struct fdpass *rp, int nfds)
1522 {
1523 struct unp_deferral *defer;
1524
1525 /* copy the file pointers to a deferral structure */
1526 defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK);
1527 defer->ud_n = nfds;
1528 memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds);
1529 memset(rp, 0, sizeof(*rp) * nfds);
1530
1531 rw_enter_write(&unp_df_lock);
1532 SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link);
1533 rw_exit_write(&unp_df_lock);
1534
1535 task_add(systqmp, &unp_gc_task);
1536 }
1537
1538 void
unp_remove_gcrefs(struct fdpass * rp,int nfds)1539 unp_remove_gcrefs(struct fdpass *rp, int nfds)
1540 {
1541 struct unpcb *unp;
1542 int i;
1543
1544 rw_assert_wrlock(&unp_gc_lock);
1545
1546 for (i = 0; i < nfds; i++) {
1547 if (rp[i].fp == NULL)
1548 continue;
1549 if ((unp = fptounp(rp[i].fp)) == NULL)
1550 continue;
1551 if (unp->unp_gcflags & UNP_GCDEAD) {
1552 KASSERT(unp->unp_gcrefs > 0);
1553 unp->unp_gcrefs--;
1554 }
1555 }
1556 }
1557
1558 void
unp_restore_gcrefs(struct fdpass * rp,int nfds)1559 unp_restore_gcrefs(struct fdpass *rp, int nfds)
1560 {
1561 struct unpcb *unp;
1562 int i;
1563
1564 rw_assert_wrlock(&unp_gc_lock);
1565
1566 for (i = 0; i < nfds; i++) {
1567 if (rp[i].fp == NULL)
1568 continue;
1569 if ((unp = fptounp(rp[i].fp)) == NULL)
1570 continue;
1571 if (unp->unp_gcflags & UNP_GCDEAD) {
1572 unp->unp_gcrefs++;
1573 unp_defer++;
1574 }
1575 }
1576 }
1577
1578 int
unp_nam2sun(struct mbuf * nam,struct sockaddr_un ** sun,size_t * pathlen)1579 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen)
1580 {
1581 struct sockaddr *sa = mtod(nam, struct sockaddr *);
1582 size_t size, len;
1583
1584 if (nam->m_len < offsetof(struct sockaddr, sa_data))
1585 return EINVAL;
1586 if (sa->sa_family != AF_UNIX)
1587 return EAFNOSUPPORT;
1588 if (sa->sa_len != nam->m_len)
1589 return EINVAL;
1590 if (sa->sa_len > sizeof(struct sockaddr_un))
1591 return EINVAL;
1592 *sun = (struct sockaddr_un *)sa;
1593
1594 /* ensure that sun_path is NUL terminated and fits */
1595 size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path);
1596 len = strnlen((*sun)->sun_path, size);
1597 if (len == sizeof((*sun)->sun_path))
1598 return EINVAL;
1599 if (len == size) {
1600 if (m_trailingspace(nam) == 0)
1601 return EINVAL;
1602 nam->m_len++;
1603 (*sun)->sun_len++;
1604 (*sun)->sun_path[len] = '\0';
1605 }
1606 if (pathlen != NULL)
1607 *pathlen = len;
1608
1609 return 0;
1610 }
1611