xref: /openbsd/sys/kern/uipc_usrreq.c (revision e2bf3321)
1 /*	$OpenBSD: uipc_usrreq.c,v 1.214 2025/01/25 22:06:41 bluhm Exp $	*/
2 /*	$NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/filedesc.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/unpcb.h>
45 #include <sys/un.h>
46 #include <sys/namei.h>
47 #include <sys/vnode.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/mbuf.h>
51 #include <sys/task.h>
52 #include <sys/pledge.h>
53 #include <sys/pool.h>
54 #include <sys/rwlock.h>
55 #include <sys/mutex.h>
56 #include <sys/sysctl.h>
57 #include <sys/lock.h>
58 #include <sys/refcnt.h>
59 
60 #include "kcov.h"
61 #if NKCOV > 0
62 #include <sys/kcov.h>
63 #endif
64 
65 /*
66  * Locks used to protect global data and struct members:
67  *      I       immutable after creation
68  *      D       unp_df_lock
69  *      G       unp_gc_lock
70  *      M       unp_ino_mtx
71  *      R       unp_rights_mtx
72  *      a       atomic
73  *      s       socket lock
74  */
75 
76 struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk");
77 struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk");
78 
79 struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
80 struct mutex unp_ino_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
81 
82 /*
83  * Stack of sets of files that were passed over a socket but were
84  * not received and need to be closed.
85  */
86 struct	unp_deferral {
87 	SLIST_ENTRY(unp_deferral)	ud_link;	/* [D] */
88 	int				ud_n;		/* [I] */
89 	/* followed by ud_n struct fdpass */
90 	struct fdpass			ud_fp[];	/* [I] */
91 };
92 
93 void	uipc_setaddr(const struct unpcb *, struct mbuf *);
94 void	unp_discard(struct fdpass *, int);
95 void	unp_remove_gcrefs(struct fdpass *, int);
96 void	unp_restore_gcrefs(struct fdpass *, int);
97 void	unp_scan(struct mbuf *, void (*)(struct fdpass *, int));
98 int	unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *);
99 static inline void unp_ref(struct unpcb *);
100 static inline void unp_rele(struct unpcb *);
101 struct socket *unp_solock_peer(struct socket *);
102 
103 struct pool unpcb_pool;
104 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL);
105 
106 /*
107  * Unix communications domain.
108  *
109  * TODO:
110  *	RDM
111  *	rethink name space problems
112  *	need a proper out-of-band
113  */
114 const struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
115 
116 /* [G] list of all UNIX domain sockets, for unp_gc() */
117 LIST_HEAD(unp_head, unpcb)	unp_head =
118 	LIST_HEAD_INITIALIZER(unp_head);
119 /* [D] list of sets of files that were sent over sockets that are now closed */
120 SLIST_HEAD(,unp_deferral)	unp_deferred =
121 	SLIST_HEAD_INITIALIZER(unp_deferred);
122 
123 ino_t	unp_ino;	/* [U] prototype for fake inode numbers */
124 int	unp_rights;	/* [R] file descriptors in flight */
125 int	unp_defer;	/* [G] number of deferred fp to close by the GC task */
126 int	unp_gcing;	/* [G] GC task currently running */
127 
128 const struct pr_usrreqs uipc_usrreqs = {
129 	.pru_attach	= uipc_attach,
130 	.pru_detach	= uipc_detach,
131 	.pru_bind	= uipc_bind,
132 	.pru_listen	= uipc_listen,
133 	.pru_connect	= uipc_connect,
134 	.pru_accept	= uipc_accept,
135 	.pru_disconnect	= uipc_disconnect,
136 	.pru_shutdown	= uipc_shutdown,
137 	.pru_rcvd	= uipc_rcvd,
138 	.pru_send	= uipc_send,
139 	.pru_abort	= uipc_abort,
140 	.pru_sense	= uipc_sense,
141 	.pru_sockaddr	= uipc_sockaddr,
142 	.pru_peeraddr	= uipc_peeraddr,
143 	.pru_connect2	= uipc_connect2,
144 };
145 
146 const struct pr_usrreqs uipc_dgram_usrreqs = {
147 	.pru_attach	= uipc_attach,
148 	.pru_detach	= uipc_detach,
149 	.pru_bind	= uipc_bind,
150 	.pru_listen	= uipc_listen,
151 	.pru_connect	= uipc_connect,
152 	.pru_disconnect	= uipc_disconnect,
153 	.pru_shutdown	= uipc_dgram_shutdown,
154 	.pru_send	= uipc_dgram_send,
155 	.pru_sense	= uipc_sense,
156 	.pru_sockaddr	= uipc_sockaddr,
157 	.pru_peeraddr	= uipc_peeraddr,
158 	.pru_connect2	= uipc_connect2,
159 };
160 
161 void
unp_init(void)162 unp_init(void)
163 {
164 	pool_init(&unpcb_pool, sizeof(struct unpcb), 0,
165 	    IPL_SOFTNET, 0, "unpcb", NULL);
166 }
167 
168 static inline void
unp_ref(struct unpcb * unp)169 unp_ref(struct unpcb *unp)
170 {
171 	refcnt_take(&unp->unp_refcnt);
172 }
173 
174 static inline void
unp_rele(struct unpcb * unp)175 unp_rele(struct unpcb *unp)
176 {
177 	refcnt_rele_wake(&unp->unp_refcnt);
178 }
179 
180 struct socket *
unp_solock_peer(struct socket * so)181 unp_solock_peer(struct socket *so)
182 {
183 	struct unpcb *unp, *unp2;
184 	struct socket *so2;
185 
186 	unp = so->so_pcb;
187 
188 again:
189 	if ((unp2 = unp->unp_conn) == NULL)
190 		return NULL;
191 
192 	so2 = unp2->unp_socket;
193 
194 	if (so < so2)
195 		solock(so2);
196 	else if (so > so2) {
197 		unp_ref(unp2);
198 		sounlock(so);
199 		solock(so2);
200 		solock(so);
201 
202 		/* Datagram socket could be reconnected due to re-lock. */
203 		if (unp->unp_conn != unp2) {
204 			sounlock(so2);
205 			unp_rele(unp2);
206 			goto again;
207 		}
208 
209 		unp_rele(unp2);
210 	}
211 
212 	return so2;
213 }
214 
215 void
uipc_setaddr(const struct unpcb * unp,struct mbuf * nam)216 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam)
217 {
218 	if (unp != NULL && unp->unp_addr != NULL) {
219 		nam->m_len = unp->unp_addr->m_len;
220 		memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t),
221 		    nam->m_len);
222 	} else {
223 		nam->m_len = sizeof(sun_noname);
224 		memcpy(mtod(nam, struct sockaddr *), &sun_noname,
225 		    nam->m_len);
226 	}
227 }
228 
229 /*
230  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
231  * for stream sockets, although the total for sender and receiver is
232  * actually only PIPSIZ.
233  * Datagram sockets really use the sendspace as the maximum datagram size,
234  * and don't really want to reserve the sendspace.  Their recvspace should
235  * be large enough for at least one max-size datagram plus address.
236  */
237 #define	PIPSIZ	32768
238 u_int	unpst_sendspace = PIPSIZ;	/* [a] */
239 u_int	unpst_recvspace = PIPSIZ;	/* [a] */
240 u_int	unpsq_sendspace = PIPSIZ;	/* [a] */
241 u_int	unpsq_recvspace = PIPSIZ;	/* [a] */
242 u_int	unpdg_sendspace = 8192;		/* [a] really max datagram size */
243 u_int	unpdg_recvspace = PIPSIZ;	/* [a] */
244 
245 const struct sysctl_bounded_args unpstctl_vars[] = {
246 	{ UNPCTL_RECVSPACE, &unpst_recvspace, 0, SB_MAX },
247 	{ UNPCTL_SENDSPACE, &unpst_sendspace, 0, SB_MAX },
248 };
249 const struct sysctl_bounded_args unpsqctl_vars[] = {
250 	{ UNPCTL_RECVSPACE, &unpsq_recvspace, 0, SB_MAX },
251 	{ UNPCTL_SENDSPACE, &unpsq_sendspace, 0, SB_MAX },
252 };
253 const struct sysctl_bounded_args unpdgctl_vars[] = {
254 	{ UNPCTL_RECVSPACE, &unpdg_recvspace, 0, SB_MAX },
255 	{ UNPCTL_SENDSPACE, &unpdg_sendspace, 0, SB_MAX },
256 };
257 
258 int
uipc_attach(struct socket * so,int proto,int wait)259 uipc_attach(struct socket *so, int proto, int wait)
260 {
261 	struct unpcb *unp;
262 	int error;
263 
264 	if (so->so_pcb)
265 		return EISCONN;
266 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
267 		switch (so->so_type) {
268 
269 		case SOCK_STREAM:
270 			error = soreserve(so,
271 			    atomic_load_int(&unpst_sendspace),
272 			    atomic_load_int(&unpst_recvspace));
273 			break;
274 
275 		case SOCK_SEQPACKET:
276 			error = soreserve(so,
277 			    atomic_load_int(&unpsq_sendspace),
278 			    atomic_load_int(&unpsq_recvspace));
279 			break;
280 
281 		case SOCK_DGRAM:
282 			error = soreserve(so,
283 			    atomic_load_int(&unpdg_sendspace),
284 			    atomic_load_int(&unpdg_recvspace));
285 			break;
286 
287 		default:
288 			panic("unp_attach");
289 		}
290 		if (error)
291 			return (error);
292 	}
293 	unp = pool_get(&unpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) |
294 	    PR_ZERO);
295 	if (unp == NULL)
296 		return (ENOBUFS);
297 	refcnt_init(&unp->unp_refcnt);
298 	unp->unp_socket = so;
299 	so->so_pcb = unp;
300 	getnanotime(&unp->unp_ctime);
301 
302 	rw_enter_write(&unp_gc_lock);
303 	LIST_INSERT_HEAD(&unp_head, unp, unp_link);
304 	rw_exit_write(&unp_gc_lock);
305 
306 	return (0);
307 }
308 
309 int
uipc_detach(struct socket * so)310 uipc_detach(struct socket *so)
311 {
312 	struct unpcb *unp = sotounpcb(so);
313 
314 	if (unp == NULL)
315 		return (EINVAL);
316 
317 	unp_detach(unp);
318 
319 	return (0);
320 }
321 
322 int
uipc_bind(struct socket * so,struct mbuf * nam,struct proc * p)323 uipc_bind(struct socket *so, struct mbuf *nam, struct proc *p)
324 {
325 	struct unpcb *unp = sotounpcb(so);
326 	struct sockaddr_un *soun;
327 	struct mbuf *nam2;
328 	struct vnode *vp;
329 	struct vattr vattr;
330 	int error;
331 	struct nameidata nd;
332 	size_t pathlen;
333 
334 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
335 		return (EINVAL);
336 	if (unp->unp_vnode != NULL)
337 		return (EINVAL);
338 	if ((error = unp_nam2sun(nam, &soun, &pathlen)))
339 		return (error);
340 
341 	unp->unp_flags |= UNP_BINDING;
342 
343 	/*
344 	 * Enforce `i_lock' -> `solock' because fifo subsystem
345 	 * requires it. The socket can't be closed concurrently
346 	 * because the file descriptor reference is still held.
347 	 */
348 
349 	sounlock(unp->unp_socket);
350 
351 	nam2 = m_getclr(M_WAITOK, MT_SONAME);
352 	nam2->m_len = sizeof(struct sockaddr_un);
353 	memcpy(mtod(nam2, struct sockaddr_un *), soun,
354 	    offsetof(struct sockaddr_un, sun_path) + pathlen);
355 	/* No need to NUL terminate: m_getclr() returns zero'd mbufs. */
356 
357 	soun = mtod(nam2, struct sockaddr_un *);
358 
359 	/* Fixup sun_len to keep it in sync with m_len. */
360 	soun->sun_len = nam2->m_len;
361 
362 	NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
363 	    soun->sun_path, p);
364 	nd.ni_pledge = PLEDGE_UNIX;
365 	nd.ni_unveil = UNVEIL_CREATE;
366 
367 	KERNEL_LOCK();
368 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
369 	error = namei(&nd);
370 	if (error != 0) {
371 		m_freem(nam2);
372 		solock(unp->unp_socket);
373 		goto out;
374 	}
375 	vp = nd.ni_vp;
376 	if (vp != NULL) {
377 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
378 		if (nd.ni_dvp == vp)
379 			vrele(nd.ni_dvp);
380 		else
381 			vput(nd.ni_dvp);
382 		vrele(vp);
383 		m_freem(nam2);
384 		error = EADDRINUSE;
385 		solock(unp->unp_socket);
386 		goto out;
387 	}
388 	vattr_null(&vattr);
389 	vattr.va_type = VSOCK;
390 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
391 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
392 	vput(nd.ni_dvp);
393 	if (error) {
394 		m_freem(nam2);
395 		solock(unp->unp_socket);
396 		goto out;
397 	}
398 	solock(unp->unp_socket);
399 	unp->unp_addr = nam2;
400 	vp = nd.ni_vp;
401 	vp->v_socket = unp->unp_socket;
402 	unp->unp_vnode = vp;
403 	unp->unp_connid.uid = p->p_ucred->cr_uid;
404 	unp->unp_connid.gid = p->p_ucred->cr_gid;
405 	unp->unp_connid.pid = p->p_p->ps_pid;
406 	unp->unp_flags |= UNP_FEIDSBIND;
407 	VOP_UNLOCK(vp);
408 out:
409 	KERNEL_UNLOCK();
410 	unp->unp_flags &= ~UNP_BINDING;
411 
412 	return (error);
413 }
414 
415 int
uipc_listen(struct socket * so)416 uipc_listen(struct socket *so)
417 {
418 	struct unpcb *unp = sotounpcb(so);
419 
420 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
421 		return (EINVAL);
422 	if (unp->unp_vnode == NULL)
423 		return (EINVAL);
424 	return (0);
425 }
426 
427 int
uipc_connect(struct socket * so,struct mbuf * nam)428 uipc_connect(struct socket *so, struct mbuf *nam)
429 {
430 	return unp_connect(so, nam, curproc);
431 }
432 
433 int
uipc_accept(struct socket * so,struct mbuf * nam)434 uipc_accept(struct socket *so, struct mbuf *nam)
435 {
436 	struct socket *so2;
437 	struct unpcb *unp = sotounpcb(so);
438 
439 	/*
440 	 * Pass back name of connected socket, if it was bound and
441 	 * we are still connected (our peer may have closed already!).
442 	 */
443 	so2 = unp_solock_peer(so);
444 	uipc_setaddr(unp->unp_conn, nam);
445 
446 	if (so2 != NULL && so2 != so)
447 		sounlock(so2);
448 	return (0);
449 }
450 
451 int
uipc_disconnect(struct socket * so)452 uipc_disconnect(struct socket *so)
453 {
454 	struct unpcb *unp = sotounpcb(so);
455 
456 	unp_disconnect(unp);
457 	return (0);
458 }
459 
460 int
uipc_shutdown(struct socket * so)461 uipc_shutdown(struct socket *so)
462 {
463 	struct unpcb *unp = sotounpcb(so);
464 	struct socket *so2;
465 
466 	socantsendmore(so);
467 
468 	if (unp->unp_conn != NULL) {
469 		so2 = unp->unp_conn->unp_socket;
470 		socantrcvmore(so2);
471 	}
472 
473 	return (0);
474 }
475 
476 int
uipc_dgram_shutdown(struct socket * so)477 uipc_dgram_shutdown(struct socket *so)
478 {
479 	socantsendmore(so);
480 	return (0);
481 }
482 
483 void
uipc_rcvd(struct socket * so)484 uipc_rcvd(struct socket *so)
485 {
486 	struct unpcb *unp = sotounpcb(so);
487 	struct socket *so2;
488 
489 	if (unp->unp_conn == NULL)
490 		return;
491 	so2 = unp->unp_conn->unp_socket;
492 
493 	/*
494 	 * Adjust backpressure on sender
495 	 * and wakeup any waiting to write.
496 	 */
497 	mtx_enter(&so->so_rcv.sb_mtx);
498 	mtx_enter(&so2->so_snd.sb_mtx);
499 	so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt;
500 	so2->so_snd.sb_cc = so->so_rcv.sb_cc;
501 	mtx_leave(&so2->so_snd.sb_mtx);
502 	mtx_leave(&so->so_rcv.sb_mtx);
503 	sowwakeup(so2);
504 }
505 
506 int
uipc_send(struct socket * so,struct mbuf * m,struct mbuf * nam,struct mbuf * control)507 uipc_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
508     struct mbuf *control)
509 {
510 	struct unpcb *unp = sotounpcb(so);
511 	struct socket *so2;
512 	int error = 0, dowakeup = 0;
513 
514 	if (control) {
515 		sounlock(so);
516 		error = unp_internalize(control, curproc);
517 		solock(so);
518 		if (error)
519 			goto out;
520 	}
521 
522 	/*
523 	 * We hold both solock() and `sb_mtx' mutex while modifying
524 	 * SS_CANTSENDMORE flag. solock() is enough to check it.
525 	 */
526 	if (so->so_snd.sb_state & SS_CANTSENDMORE) {
527 		error = EPIPE;
528 		goto dispose;
529 	}
530 	if (unp->unp_conn == NULL) {
531 		error = ENOTCONN;
532 		goto dispose;
533 	}
534 
535 	so2 = unp->unp_conn->unp_socket;
536 
537 	/*
538 	 * Send to paired receive port, and then raise
539 	 * send buffer counts to maintain backpressure.
540 	 * Wake up readers.
541 	 */
542 	/*
543 	 * sbappend*() should be serialized together
544 	 * with so_snd modification.
545 	 */
546 	mtx_enter(&so2->so_rcv.sb_mtx);
547 	mtx_enter(&so->so_snd.sb_mtx);
548 	if (control) {
549 		if (sbappendcontrol(so2, &so2->so_rcv, m, control)) {
550 			control = NULL;
551 		} else {
552 			mtx_leave(&so->so_snd.sb_mtx);
553 			mtx_leave(&so2->so_rcv.sb_mtx);
554 			error = ENOBUFS;
555 			goto dispose;
556 		}
557 	} else if (so->so_type == SOCK_SEQPACKET)
558 		sbappendrecord(so2, &so2->so_rcv, m);
559 	else
560 		sbappend(so2, &so2->so_rcv, m);
561 	so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt;
562 	so->so_snd.sb_cc = so2->so_rcv.sb_cc;
563 	if (so2->so_rcv.sb_cc > 0)
564 		dowakeup = 1;
565 	mtx_leave(&so->so_snd.sb_mtx);
566 	mtx_leave(&so2->so_rcv.sb_mtx);
567 
568 	if (dowakeup)
569 		sorwakeup(so2);
570 
571 	m = NULL;
572 
573 dispose:
574 	/* we need to undo unp_internalize in case of errors */
575 	if (control && error)
576 		unp_dispose(control);
577 
578 out:
579 	m_freem(control);
580 	m_freem(m);
581 
582 	return (error);
583 }
584 
585 int
uipc_dgram_send(struct socket * so,struct mbuf * m,struct mbuf * nam,struct mbuf * control)586 uipc_dgram_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
587     struct mbuf *control)
588 {
589 	struct unpcb *unp = sotounpcb(so);
590 	struct socket *so2;
591 	const struct sockaddr *from;
592 	int error = 0, dowakeup = 0;
593 
594 	if (control) {
595 		sounlock(so);
596 		error = unp_internalize(control, curproc);
597 		solock(so);
598 		if (error)
599 			goto out;
600 	}
601 
602 	if (nam) {
603 		if (unp->unp_conn) {
604 			error = EISCONN;
605 			goto dispose;
606 		}
607 		error = unp_connect(so, nam, curproc);
608 		if (error)
609 			goto dispose;
610 	}
611 
612 	if (unp->unp_conn == NULL) {
613 		if (nam != NULL)
614 			error = ECONNREFUSED;
615 		else
616 			error = ENOTCONN;
617 		goto dispose;
618 	}
619 
620 	so2 = unp->unp_conn->unp_socket;
621 
622 	if (unp->unp_addr)
623 		from = mtod(unp->unp_addr, struct sockaddr *);
624 	else
625 		from = &sun_noname;
626 
627 	mtx_enter(&so2->so_rcv.sb_mtx);
628 	if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) {
629 		dowakeup = 1;
630 		m = NULL;
631 		control = NULL;
632 	} else
633 		error = ENOBUFS;
634 	mtx_leave(&so2->so_rcv.sb_mtx);
635 
636 	if (dowakeup)
637 		sorwakeup(so2);
638 	if (nam)
639 		unp_disconnect(unp);
640 
641 dispose:
642 	/* we need to undo unp_internalize in case of errors */
643 	if (control && error)
644 		unp_dispose(control);
645 
646 out:
647 	m_freem(control);
648 	m_freem(m);
649 
650 	return (error);
651 }
652 
653 void
uipc_abort(struct socket * so)654 uipc_abort(struct socket *so)
655 {
656 	struct unpcb *unp = sotounpcb(so);
657 
658 	unp_detach(unp);
659 	sofree(so, 1);
660 }
661 
662 int
uipc_sense(struct socket * so,struct stat * sb)663 uipc_sense(struct socket *so, struct stat *sb)
664 {
665 	struct unpcb *unp = sotounpcb(so);
666 
667 	sb->st_blksize = so->so_snd.sb_hiwat;
668 	sb->st_dev = NODEV;
669 	mtx_enter(&unp_ino_mtx);
670 	if (unp->unp_ino == 0)
671 		unp->unp_ino = unp_ino++;
672 	mtx_leave(&unp_ino_mtx);
673 	sb->st_atim.tv_sec =
674 	    sb->st_mtim.tv_sec =
675 	    sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec;
676 	sb->st_atim.tv_nsec =
677 	    sb->st_mtim.tv_nsec =
678 	    sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec;
679 	sb->st_ino = unp->unp_ino;
680 
681 	return (0);
682 }
683 
684 int
uipc_sockaddr(struct socket * so,struct mbuf * nam)685 uipc_sockaddr(struct socket *so, struct mbuf *nam)
686 {
687 	struct unpcb *unp = sotounpcb(so);
688 
689 	uipc_setaddr(unp, nam);
690 	return (0);
691 }
692 
693 int
uipc_peeraddr(struct socket * so,struct mbuf * nam)694 uipc_peeraddr(struct socket *so, struct mbuf *nam)
695 {
696 	struct unpcb *unp = sotounpcb(so);
697 	struct socket *so2;
698 
699 	so2 = unp_solock_peer(so);
700 	uipc_setaddr(unp->unp_conn, nam);
701 	if (so2 != NULL && so2 != so)
702 		sounlock(so2);
703 	return (0);
704 }
705 
706 int
uipc_connect2(struct socket * so,struct socket * so2)707 uipc_connect2(struct socket *so, struct socket *so2)
708 {
709 	struct unpcb *unp = sotounpcb(so), *unp2;
710 	int error;
711 
712 	if ((error = unp_connect2(so, so2)))
713 		return (error);
714 
715 	unp->unp_connid.uid = curproc->p_ucred->cr_uid;
716 	unp->unp_connid.gid = curproc->p_ucred->cr_gid;
717 	unp->unp_connid.pid = curproc->p_p->ps_pid;
718 	unp->unp_flags |= UNP_FEIDS;
719 	unp2 = sotounpcb(so2);
720 	unp2->unp_connid.uid = curproc->p_ucred->cr_uid;
721 	unp2->unp_connid.gid = curproc->p_ucred->cr_gid;
722 	unp2->unp_connid.pid = curproc->p_p->ps_pid;
723 	unp2->unp_flags |= UNP_FEIDS;
724 
725 	return (0);
726 }
727 
728 int
uipc_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)729 uipc_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
730     size_t newlen)
731 {
732 	int *valp = &unp_defer;
733 
734 	/* All sysctl names at this level are terminal. */
735 	switch (name[0]) {
736 	case SOCK_STREAM:
737 		if (namelen != 2)
738 			return (ENOTDIR);
739 		return sysctl_bounded_arr(unpstctl_vars, nitems(unpstctl_vars),
740 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
741 	case SOCK_SEQPACKET:
742 		if (namelen != 2)
743 			return (ENOTDIR);
744 		return sysctl_bounded_arr(unpsqctl_vars, nitems(unpsqctl_vars),
745 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
746 	case SOCK_DGRAM:
747 		if (namelen != 2)
748 			return (ENOTDIR);
749 		return sysctl_bounded_arr(unpdgctl_vars, nitems(unpdgctl_vars),
750 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
751 	case NET_UNIX_INFLIGHT:
752 		valp = &unp_rights;
753 		/* FALLTHROUGH */
754 	case NET_UNIX_DEFERRED:
755 		if (namelen != 1)
756 			return (ENOTDIR);
757 		return sysctl_rdint(oldp, oldlenp, newp, *valp);
758 	default:
759 		return (ENOPROTOOPT);
760 	}
761 }
762 
763 void
unp_detach(struct unpcb * unp)764 unp_detach(struct unpcb *unp)
765 {
766 	struct socket *so = unp->unp_socket;
767 	struct vnode *vp = unp->unp_vnode;
768 	struct unpcb *unp2;
769 
770 	unp->unp_vnode = NULL;
771 
772 	rw_enter_write(&unp_gc_lock);
773 	LIST_REMOVE(unp, unp_link);
774 	rw_exit_write(&unp_gc_lock);
775 
776 	if (vp != NULL) {
777 		/* Enforce `i_lock' -> solock() lock order. */
778 		sounlock(so);
779 		VOP_LOCK(vp, LK_EXCLUSIVE);
780 		vp->v_socket = NULL;
781 
782 		KERNEL_LOCK();
783 		vput(vp);
784 		KERNEL_UNLOCK();
785 		solock(so);
786 	}
787 
788 	if (unp->unp_conn != NULL) {
789 		/*
790 		 * Datagram socket could be connected to itself.
791 		 * Such socket will be disconnected here.
792 		 */
793 		unp_disconnect(unp);
794 	}
795 
796 	while ((unp2 = SLIST_FIRST(&unp->unp_refs)) != NULL) {
797 		struct socket *so2 = unp2->unp_socket;
798 
799 		if (so < so2)
800 			solock(so2);
801 		else {
802 			unp_ref(unp2);
803 			sounlock(so);
804 			solock(so2);
805 			solock(so);
806 
807 			if (unp2->unp_conn != unp) {
808 				/* `unp2' was disconnected due to re-lock. */
809 				sounlock(so2);
810 				unp_rele(unp2);
811 				continue;
812 			}
813 
814 			unp_rele(unp2);
815 		}
816 
817 		unp2->unp_conn = NULL;
818 		SLIST_REMOVE(&unp->unp_refs, unp2, unpcb, unp_nextref);
819 		so2->so_error = ECONNRESET;
820 		so2->so_state &= ~SS_ISCONNECTED;
821 
822 		sounlock(so2);
823 	}
824 
825 	sounlock(so);
826 	refcnt_finalize(&unp->unp_refcnt, "unpfinal");
827 	solock(so);
828 
829 	soisdisconnected(so);
830 	so->so_pcb = NULL;
831 	m_freem(unp->unp_addr);
832 	pool_put(&unpcb_pool, unp);
833 	if (unp_rights)
834 		task_add(systqmp, &unp_gc_task);
835 }
836 
837 int
unp_connect(struct socket * so,struct mbuf * nam,struct proc * p)838 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
839 {
840 	struct sockaddr_un *soun;
841 	struct vnode *vp;
842 	struct socket *so2, *so3;
843 	struct unpcb *unp, *unp2, *unp3;
844 	struct nameidata nd;
845 	int error;
846 
847 	unp = sotounpcb(so);
848 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
849 		return (EISCONN);
850 	if ((error = unp_nam2sun(nam, &soun, NULL)))
851 		return (error);
852 
853 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
854 	nd.ni_pledge = PLEDGE_UNIX;
855 	nd.ni_unveil = UNVEIL_WRITE;
856 
857 	unp->unp_flags |= UNP_CONNECTING;
858 
859 	/*
860 	 * Enforce `i_lock' -> `solock' because fifo subsystem
861 	 * requires it. The socket can't be closed concurrently
862 	 * because the file descriptor reference is still held.
863 	 */
864 
865 	sounlock(so);
866 
867 	KERNEL_LOCK();
868 	error = namei(&nd);
869 	if (error != 0)
870 		goto unlock;
871 	vp = nd.ni_vp;
872 	if (vp->v_type != VSOCK) {
873 		error = ENOTSOCK;
874 		goto put;
875 	}
876 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
877 		goto put;
878 	so2 = vp->v_socket;
879 	if (so2 == NULL) {
880 		error = ECONNREFUSED;
881 		goto put;
882 	}
883 	if (so->so_type != so2->so_type) {
884 		error = EPROTOTYPE;
885 		goto put;
886 	}
887 
888 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
889 		solock(so2);
890 
891 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
892 		    (so3 = sonewconn(so2, 0, M_WAIT)) == NULL) {
893 			sounlock(so2);
894 			error = ECONNREFUSED;
895 			goto put;
896 		}
897 
898 		/*
899 		 * Since `so2' is protected by vnode(9) lock, `so3'
900 		 * can't be PRU_ABORT'ed here.
901 		 */
902 		sounlock(so2);
903 		sounlock(so3);
904 		solock_pair(so, so3);
905 
906 		unp2 = sotounpcb(so2);
907 		unp3 = sotounpcb(so3);
908 
909 		/*
910 		 * `unp_addr', `unp_connid' and 'UNP_FEIDSBIND' flag
911 		 * are immutable since we set them in uipc_bind().
912 		 */
913 		if (unp2->unp_addr)
914 			unp3->unp_addr =
915 			    m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT);
916 		unp3->unp_connid.uid = p->p_ucred->cr_uid;
917 		unp3->unp_connid.gid = p->p_ucred->cr_gid;
918 		unp3->unp_connid.pid = p->p_p->ps_pid;
919 		unp3->unp_flags |= UNP_FEIDS;
920 
921 		if (unp2->unp_flags & UNP_FEIDSBIND) {
922 			unp->unp_connid = unp2->unp_connid;
923 			unp->unp_flags |= UNP_FEIDS;
924 		}
925 
926 		so2 = so3;
927 	} else {
928 		if (so2 != so)
929 			solock_pair(so, so2);
930 		else
931 			solock(so);
932 	}
933 
934 	error = unp_connect2(so, so2);
935 
936 	sounlock(so);
937 
938 	/*
939 	 * `so2' can't be PRU_ABORT'ed concurrently
940 	 */
941 	if (so2 != so)
942 		sounlock(so2);
943 put:
944 	vput(vp);
945 unlock:
946 	KERNEL_UNLOCK();
947 	solock(so);
948 	unp->unp_flags &= ~UNP_CONNECTING;
949 
950 	/*
951 	 * The peer socket could be closed by concurrent thread
952 	 * when `so' and `vp' are unlocked.
953 	 */
954 	if (error == 0 && unp->unp_conn == NULL)
955 		error = ECONNREFUSED;
956 
957 	return (error);
958 }
959 
960 int
unp_connect2(struct socket * so,struct socket * so2)961 unp_connect2(struct socket *so, struct socket *so2)
962 {
963 	struct unpcb *unp = sotounpcb(so);
964 	struct unpcb *unp2;
965 
966 	soassertlocked(so);
967 	soassertlocked(so2);
968 
969 	if (so2->so_type != so->so_type)
970 		return (EPROTOTYPE);
971 	unp2 = sotounpcb(so2);
972 	unp->unp_conn = unp2;
973 	switch (so->so_type) {
974 
975 	case SOCK_DGRAM:
976 		SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref);
977 		soisconnected(so);
978 		break;
979 
980 	case SOCK_STREAM:
981 	case SOCK_SEQPACKET:
982 		unp2->unp_conn = unp;
983 		soisconnected(so);
984 		soisconnected(so2);
985 		break;
986 
987 	default:
988 		panic("unp_connect2");
989 	}
990 	return (0);
991 }
992 
993 void
unp_disconnect(struct unpcb * unp)994 unp_disconnect(struct unpcb *unp)
995 {
996 	struct socket *so2;
997 	struct unpcb *unp2;
998 
999 	if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL)
1000 		return;
1001 
1002 	unp2 = unp->unp_conn;
1003 	unp->unp_conn = NULL;
1004 
1005 	switch (unp->unp_socket->so_type) {
1006 
1007 	case SOCK_DGRAM:
1008 		SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref);
1009 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1010 		break;
1011 
1012 	case SOCK_STREAM:
1013 	case SOCK_SEQPACKET:
1014 		unp->unp_socket->so_snd.sb_mbcnt = 0;
1015 		unp->unp_socket->so_snd.sb_cc = 0;
1016 		soisdisconnected(unp->unp_socket);
1017 		unp2->unp_conn = NULL;
1018 		unp2->unp_socket->so_snd.sb_mbcnt = 0;
1019 		unp2->unp_socket->so_snd.sb_cc = 0;
1020 		soisdisconnected(unp2->unp_socket);
1021 		break;
1022 	}
1023 
1024 	if (so2 != unp->unp_socket)
1025 		sounlock(so2);
1026 }
1027 
1028 static struct unpcb *
fptounp(struct file * fp)1029 fptounp(struct file *fp)
1030 {
1031 	struct socket *so;
1032 
1033 	if (fp->f_type != DTYPE_SOCKET)
1034 		return (NULL);
1035 	if ((so = fp->f_data) == NULL)
1036 		return (NULL);
1037 	if (so->so_proto->pr_domain != &unixdomain)
1038 		return (NULL);
1039 	return (sotounpcb(so));
1040 }
1041 
1042 int
unp_externalize(struct mbuf * rights,socklen_t controllen,int flags)1043 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags)
1044 {
1045 	struct proc *p = curproc;		/* XXX */
1046 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1047 	struct filedesc *fdp = p->p_fd;
1048 	int i, *fds = NULL;
1049 	struct fdpass *rp;
1050 	struct file *fp;
1051 	int nfds, error = 0;
1052 
1053 	/*
1054 	 * This code only works because SCM_RIGHTS is the only supported
1055 	 * control message type on unix sockets. Enforce this here.
1056 	 */
1057 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET)
1058 		return EINVAL;
1059 
1060 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
1061 	    sizeof(struct fdpass);
1062 	if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr)))
1063 		controllen = 0;
1064 	else
1065 		controllen -= CMSG_ALIGN(sizeof(struct cmsghdr));
1066 	if (nfds > controllen / sizeof(int)) {
1067 		error = EMSGSIZE;
1068 		goto out;
1069 	}
1070 
1071 	/* Make sure the recipient should be able to see the descriptors.. */
1072 	rp = (struct fdpass *)CMSG_DATA(cm);
1073 
1074 	/* fdp->fd_rdir requires KERNEL_LOCK() */
1075 	KERNEL_LOCK();
1076 
1077 	for (i = 0; i < nfds; i++) {
1078 		fp = rp->fp;
1079 		rp++;
1080 		error = pledge_recvfd(p, fp);
1081 		if (error)
1082 			break;
1083 
1084 		/*
1085 		 * No to block devices.  If passing a directory,
1086 		 * make sure that it is underneath the root.
1087 		 */
1088 		if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) {
1089 			struct vnode *vp = (struct vnode *)fp->f_data;
1090 
1091 			if (vp->v_type == VBLK ||
1092 			    (vp->v_type == VDIR &&
1093 			    !vn_isunder(vp, fdp->fd_rdir, p))) {
1094 				error = EPERM;
1095 				break;
1096 			}
1097 		}
1098 	}
1099 
1100 	KERNEL_UNLOCK();
1101 
1102 	if (error)
1103 		goto out;
1104 
1105 	fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK);
1106 
1107 	fdplock(fdp);
1108 restart:
1109 	/*
1110 	 * First loop -- allocate file descriptor table slots for the
1111 	 * new descriptors.
1112 	 */
1113 	rp = ((struct fdpass *)CMSG_DATA(cm));
1114 	for (i = 0; i < nfds; i++) {
1115 		if ((error = fdalloc(p, 0, &fds[i])) != 0) {
1116 			/*
1117 			 * Back out what we've done so far.
1118 			 */
1119 			for (--i; i >= 0; i--)
1120 				fdremove(fdp, fds[i]);
1121 
1122 			if (error == ENOSPC) {
1123 				fdexpand(p);
1124 				goto restart;
1125 			}
1126 
1127 			fdpunlock(fdp);
1128 
1129 			/*
1130 			 * This is the error that has historically
1131 			 * been returned, and some callers may
1132 			 * expect it.
1133 			 */
1134 
1135 			error = EMSGSIZE;
1136 			goto out;
1137 		}
1138 
1139 		/*
1140 		 * Make the slot reference the descriptor so that
1141 		 * fdalloc() works properly.. We finalize it all
1142 		 * in the loop below.
1143 		 */
1144 		mtx_enter(&fdp->fd_fplock);
1145 		KASSERT(fdp->fd_ofiles[fds[i]] == NULL);
1146 		fdp->fd_ofiles[fds[i]] = rp->fp;
1147 		mtx_leave(&fdp->fd_fplock);
1148 
1149 		fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED);
1150 		if (flags & MSG_CMSG_CLOEXEC)
1151 			fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE;
1152 
1153 		rp++;
1154 	}
1155 
1156 	/*
1157 	 * Keep `fdp' locked to prevent concurrent close() of just
1158 	 * inserted descriptors. Such descriptors could have the only
1159 	 * `f_count' reference which is now shared between control
1160 	 * message and `fdp'.
1161 	 */
1162 
1163 	/*
1164 	 * Now that adding them has succeeded, update all of the
1165 	 * descriptor passing state.
1166 	 */
1167 	rp = (struct fdpass *)CMSG_DATA(cm);
1168 
1169 	for (i = 0; i < nfds; i++) {
1170 		struct unpcb *unp;
1171 
1172 		fp = rp->fp;
1173 		rp++;
1174 		if ((unp = fptounp(fp)) != NULL) {
1175 			rw_enter_write(&unp_gc_lock);
1176 			unp->unp_msgcount--;
1177 			rw_exit_write(&unp_gc_lock);
1178 		}
1179 	}
1180 	fdpunlock(fdp);
1181 
1182 	mtx_enter(&unp_rights_mtx);
1183 	unp_rights -= nfds;
1184 	mtx_leave(&unp_rights_mtx);
1185 
1186 	/*
1187 	 * Copy temporary array to message and adjust length, in case of
1188 	 * transition from large struct file pointers to ints.
1189 	 */
1190 	memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int));
1191 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
1192 	rights->m_len = CMSG_LEN(nfds * sizeof(int));
1193  out:
1194 	if (fds != NULL)
1195 		free(fds, M_TEMP, nfds * sizeof(int));
1196 
1197 	if (error) {
1198 		if (nfds > 0) {
1199 			/*
1200 			 * No lock required. We are the only `cm' holder.
1201 			 */
1202 			rp = ((struct fdpass *)CMSG_DATA(cm));
1203 			unp_discard(rp, nfds);
1204 		}
1205 	}
1206 
1207 	return (error);
1208 }
1209 
1210 int
unp_internalize(struct mbuf * control,struct proc * p)1211 unp_internalize(struct mbuf *control, struct proc *p)
1212 {
1213 	struct filedesc *fdp = p->p_fd;
1214 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1215 	struct fdpass *rp;
1216 	struct file *fp;
1217 	struct unpcb *unp;
1218 	int i, error;
1219 	int nfds, *ip, fd, neededspace;
1220 
1221 	/*
1222 	 * Check for two potential msg_controllen values because
1223 	 * IETF stuck their nose in a place it does not belong.
1224 	 */
1225 	if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0))
1226 		return (EINVAL);
1227 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1228 	    !(cm->cmsg_len == control->m_len ||
1229 	    control->m_len == CMSG_ALIGN(cm->cmsg_len)))
1230 		return (EINVAL);
1231 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
1232 
1233 	mtx_enter(&unp_rights_mtx);
1234 	if (unp_rights + nfds > maxfiles / 10) {
1235 		mtx_leave(&unp_rights_mtx);
1236 		return (EMFILE);
1237 	}
1238 	unp_rights += nfds;
1239 	mtx_leave(&unp_rights_mtx);
1240 
1241 	/* Make sure we have room for the struct file pointers */
1242 morespace:
1243 	neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) -
1244 	    control->m_len;
1245 	if (neededspace > m_trailingspace(control)) {
1246 		char *tmp;
1247 		/* if we already have a cluster, the message is just too big */
1248 		if (control->m_flags & M_EXT) {
1249 			error = E2BIG;
1250 			goto nospace;
1251 		}
1252 
1253 		/* copy cmsg data temporarily out of the mbuf */
1254 		tmp = malloc(control->m_len, M_TEMP, M_WAITOK);
1255 		memcpy(tmp, mtod(control, caddr_t), control->m_len);
1256 
1257 		/* allocate a cluster and try again */
1258 		MCLGET(control, M_WAIT);
1259 		if ((control->m_flags & M_EXT) == 0) {
1260 			free(tmp, M_TEMP, control->m_len);
1261 			error = ENOBUFS;       /* allocation failed */
1262 			goto nospace;
1263 		}
1264 
1265 		/* copy the data back into the cluster */
1266 		cm = mtod(control, struct cmsghdr *);
1267 		memcpy(cm, tmp, control->m_len);
1268 		free(tmp, M_TEMP, control->m_len);
1269 		goto morespace;
1270 	}
1271 
1272 	/* adjust message & mbuf to note amount of space actually used. */
1273 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass));
1274 	control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass));
1275 
1276 	ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
1277 	rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1;
1278 	fdplock(fdp);
1279 	for (i = 0; i < nfds; i++) {
1280 		memcpy(&fd, ip, sizeof fd);
1281 		ip--;
1282 		if ((fp = fd_getfile(fdp, fd)) == NULL) {
1283 			error = EBADF;
1284 			goto fail;
1285 		}
1286 		if (fp->f_count >= FDUP_MAX_COUNT) {
1287 			error = EDEADLK;
1288 			goto fail;
1289 		}
1290 		error = pledge_sendfd(p, fp);
1291 		if (error)
1292 			goto fail;
1293 
1294 		/* kqueue descriptors cannot be copied */
1295 		if (fp->f_type == DTYPE_KQUEUE) {
1296 			error = EINVAL;
1297 			goto fail;
1298 		}
1299 #if NKCOV > 0
1300 		/* kcov descriptors cannot be copied */
1301 		if (fp->f_type == DTYPE_VNODE && kcov_vnode(fp->f_data)) {
1302 			error = EINVAL;
1303 			goto fail;
1304 		}
1305 #endif
1306 		rp->fp = fp;
1307 		rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED;
1308 		rp--;
1309 		if ((unp = fptounp(fp)) != NULL) {
1310 			rw_enter_write(&unp_gc_lock);
1311 			unp->unp_msgcount++;
1312 			unp->unp_file = fp;
1313 			rw_exit_write(&unp_gc_lock);
1314 		}
1315 	}
1316 	fdpunlock(fdp);
1317 	return (0);
1318 fail:
1319 	fdpunlock(fdp);
1320 	if (fp != NULL)
1321 		FRELE(fp, p);
1322 	/* Back out what we just did. */
1323 	for ( ; i > 0; i--) {
1324 		rp++;
1325 		fp = rp->fp;
1326 		if ((unp = fptounp(fp)) != NULL) {
1327 			rw_enter_write(&unp_gc_lock);
1328 			unp->unp_msgcount--;
1329 			rw_exit_write(&unp_gc_lock);
1330 		}
1331 		FRELE(fp, p);
1332 	}
1333 
1334 nospace:
1335 	mtx_enter(&unp_rights_mtx);
1336 	unp_rights -= nfds;
1337 	mtx_leave(&unp_rights_mtx);
1338 
1339 	return (error);
1340 }
1341 
1342 void
unp_gc(void * arg __unused)1343 unp_gc(void *arg __unused)
1344 {
1345 	struct unp_deferral *defer;
1346 	struct file *fp;
1347 	struct socket *so;
1348 	struct unpcb *unp;
1349 	int nunref, i;
1350 
1351 	rw_enter_write(&unp_gc_lock);
1352 	if (unp_gcing)
1353 		goto unlock;
1354 	unp_gcing = 1;
1355 	rw_exit_write(&unp_gc_lock);
1356 
1357 	rw_enter_write(&unp_df_lock);
1358 	/* close any fds on the deferred list */
1359 	while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) {
1360 		SLIST_REMOVE_HEAD(&unp_deferred, ud_link);
1361 		rw_exit_write(&unp_df_lock);
1362 		for (i = 0; i < defer->ud_n; i++) {
1363 			fp = defer->ud_fp[i].fp;
1364 			if (fp == NULL)
1365 				continue;
1366 			if ((unp = fptounp(fp)) != NULL) {
1367 				rw_enter_write(&unp_gc_lock);
1368 				unp->unp_msgcount--;
1369 				rw_exit_write(&unp_gc_lock);
1370 			}
1371 			mtx_enter(&unp_rights_mtx);
1372 			unp_rights--;
1373 			mtx_leave(&unp_rights_mtx);
1374 			 /* closef() expects a refcount of 2 */
1375 			FREF(fp);
1376 			(void) closef(fp, NULL);
1377 		}
1378 		free(defer, M_TEMP, sizeof(*defer) +
1379 		    sizeof(struct fdpass) * defer->ud_n);
1380 		rw_enter_write(&unp_df_lock);
1381 	}
1382 	rw_exit_write(&unp_df_lock);
1383 
1384 	nunref = 0;
1385 
1386 	rw_enter_write(&unp_gc_lock);
1387 
1388 	/*
1389 	 * Determine sockets which may be prospectively dead. Such
1390 	 * sockets have their `unp_msgcount' equal to the `f_count'.
1391 	 * If `unp_msgcount' is 0, the socket has not been passed
1392 	 * and can't be unreferenced.
1393 	 */
1394 	LIST_FOREACH(unp, &unp_head, unp_link) {
1395 		unp->unp_gcflags = 0;
1396 
1397 		if (unp->unp_msgcount == 0)
1398 			continue;
1399 		if ((fp = unp->unp_file) == NULL)
1400 			continue;
1401 		if (fp->f_count == unp->unp_msgcount) {
1402 			unp->unp_gcflags |= UNP_GCDEAD;
1403 			unp->unp_gcrefs = unp->unp_msgcount;
1404 			nunref++;
1405 		}
1406 	}
1407 
1408 	/*
1409 	 * Scan all sockets previously marked as dead. Remove
1410 	 * the `unp_gcrefs' reference each socket holds on any
1411 	 * dead socket in its buffer.
1412 	 */
1413 	LIST_FOREACH(unp, &unp_head, unp_link) {
1414 		if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1415 			continue;
1416 		so = unp->unp_socket;
1417 		mtx_enter(&so->so_rcv.sb_mtx);
1418 		unp_scan(so->so_rcv.sb_mb, unp_remove_gcrefs);
1419 		mtx_leave(&so->so_rcv.sb_mtx);
1420 	}
1421 
1422 	/*
1423 	 * If the dead socket has `unp_gcrefs' reference counter
1424 	 * greater than 0, it can't be unreferenced. Mark it as
1425 	 * alive and increment the `unp_gcrefs' reference for each
1426 	 * dead socket within its buffer. Repeat this until we
1427 	 * have no new alive sockets found.
1428 	 */
1429 	do {
1430 		unp_defer = 0;
1431 
1432 		LIST_FOREACH(unp, &unp_head, unp_link) {
1433 			if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1434 				continue;
1435 			if (unp->unp_gcrefs == 0)
1436 				continue;
1437 
1438 			unp->unp_gcflags &= ~UNP_GCDEAD;
1439 
1440 			so = unp->unp_socket;
1441 			mtx_enter(&so->so_rcv.sb_mtx);
1442 			unp_scan(so->so_rcv.sb_mb, unp_restore_gcrefs);
1443 			mtx_leave(&so->so_rcv.sb_mtx);
1444 
1445 			KASSERT(nunref > 0);
1446 			nunref--;
1447 		}
1448 	} while (unp_defer > 0);
1449 
1450 	/*
1451 	 * If there are any unreferenced sockets, then for each dispose
1452 	 * of files in its receive buffer and then close it.
1453 	 */
1454 	if (nunref) {
1455 		LIST_FOREACH(unp, &unp_head, unp_link) {
1456 			if (unp->unp_gcflags & UNP_GCDEAD) {
1457 				struct sockbuf *sb = &unp->unp_socket->so_rcv;
1458 				struct mbuf *m;
1459 
1460 				/*
1461 				 * This socket could still be connected
1462 				 * and if so it's `so_rcv' is still
1463 				 * accessible by concurrent PRU_SEND
1464 				 * thread.
1465 				 */
1466 
1467 				mtx_enter(&sb->sb_mtx);
1468 				m = sb->sb_mb;
1469 				memset(&sb->sb_startzero, 0,
1470 				    (caddr_t)&sb->sb_endzero -
1471 				    (caddr_t)&sb->sb_startzero);
1472 				sb->sb_timeo_nsecs = INFSLP;
1473 				mtx_leave(&sb->sb_mtx);
1474 
1475 				unp_scan(m, unp_discard);
1476 				m_purge(m);
1477 			}
1478 		}
1479 	}
1480 
1481 	unp_gcing = 0;
1482 unlock:
1483 	rw_exit_write(&unp_gc_lock);
1484 }
1485 
1486 void
unp_dispose(struct mbuf * m)1487 unp_dispose(struct mbuf *m)
1488 {
1489 
1490 	if (m)
1491 		unp_scan(m, unp_discard);
1492 }
1493 
1494 void
unp_scan(struct mbuf * m0,void (* op)(struct fdpass *,int))1495 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int))
1496 {
1497 	struct mbuf *m;
1498 	struct fdpass *rp;
1499 	struct cmsghdr *cm;
1500 	int qfds;
1501 
1502 	while (m0) {
1503 		for (m = m0; m; m = m->m_next) {
1504 			if (m->m_type == MT_CONTROL &&
1505 			    m->m_len >= sizeof(*cm)) {
1506 				cm = mtod(m, struct cmsghdr *);
1507 				if (cm->cmsg_level != SOL_SOCKET ||
1508 				    cm->cmsg_type != SCM_RIGHTS)
1509 					continue;
1510 				qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
1511 				    / sizeof(struct fdpass);
1512 				if (qfds > 0) {
1513 					rp = (struct fdpass *)CMSG_DATA(cm);
1514 					op(rp, qfds);
1515 				}
1516 				break;		/* XXX, but saves time */
1517 			}
1518 		}
1519 		m0 = m0->m_nextpkt;
1520 	}
1521 }
1522 
1523 void
unp_discard(struct fdpass * rp,int nfds)1524 unp_discard(struct fdpass *rp, int nfds)
1525 {
1526 	struct unp_deferral *defer;
1527 
1528 	/* copy the file pointers to a deferral structure */
1529 	defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK);
1530 	defer->ud_n = nfds;
1531 	memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds);
1532 	memset(rp, 0, sizeof(*rp) * nfds);
1533 
1534 	rw_enter_write(&unp_df_lock);
1535 	SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link);
1536 	rw_exit_write(&unp_df_lock);
1537 
1538 	task_add(systqmp, &unp_gc_task);
1539 }
1540 
1541 void
unp_remove_gcrefs(struct fdpass * rp,int nfds)1542 unp_remove_gcrefs(struct fdpass *rp, int nfds)
1543 {
1544 	struct unpcb *unp;
1545 	int i;
1546 
1547 	rw_assert_wrlock(&unp_gc_lock);
1548 
1549 	for (i = 0; i < nfds; i++) {
1550 		if (rp[i].fp == NULL)
1551 			continue;
1552 		if ((unp = fptounp(rp[i].fp)) == NULL)
1553 			continue;
1554 		if (unp->unp_gcflags & UNP_GCDEAD) {
1555 			KASSERT(unp->unp_gcrefs > 0);
1556 			unp->unp_gcrefs--;
1557 		}
1558 	}
1559 }
1560 
1561 void
unp_restore_gcrefs(struct fdpass * rp,int nfds)1562 unp_restore_gcrefs(struct fdpass *rp, int nfds)
1563 {
1564 	struct unpcb *unp;
1565 	int i;
1566 
1567 	rw_assert_wrlock(&unp_gc_lock);
1568 
1569 	for (i = 0; i < nfds; i++) {
1570 		if (rp[i].fp == NULL)
1571 			continue;
1572 		if ((unp = fptounp(rp[i].fp)) == NULL)
1573 			continue;
1574 		if (unp->unp_gcflags & UNP_GCDEAD) {
1575 			unp->unp_gcrefs++;
1576 			unp_defer++;
1577 		}
1578 	}
1579 }
1580 
1581 int
unp_nam2sun(struct mbuf * nam,struct sockaddr_un ** sun,size_t * pathlen)1582 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen)
1583 {
1584 	struct sockaddr *sa = mtod(nam, struct sockaddr *);
1585 	size_t size, len;
1586 
1587 	if (nam->m_len < offsetof(struct sockaddr, sa_data))
1588 		return EINVAL;
1589 	if (sa->sa_family != AF_UNIX)
1590 		return EAFNOSUPPORT;
1591 	if (sa->sa_len != nam->m_len)
1592 		return EINVAL;
1593 	if (sa->sa_len > sizeof(struct sockaddr_un))
1594 		return EINVAL;
1595 	*sun = (struct sockaddr_un *)sa;
1596 
1597 	/* ensure that sun_path is NUL terminated and fits */
1598 	size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path);
1599 	len = strnlen((*sun)->sun_path, size);
1600 	if (len == sizeof((*sun)->sun_path))
1601 		return EINVAL;
1602 	if (len == size) {
1603 		if (m_trailingspace(nam) == 0)
1604 			return EINVAL;
1605 		nam->m_len++;
1606 		(*sun)->sun_len++;
1607 		(*sun)->sun_path[len] = '\0';
1608 	}
1609 	if (pathlen != NULL)
1610 		*pathlen = len;
1611 
1612 	return 0;
1613 }
1614