xref: /openbsd/sys/kern/uipc_usrreq.c (revision bb0cd11a)
1 /*	$OpenBSD: uipc_usrreq.c,v 1.206 2024/05/03 17:43:09 mvs Exp $	*/
2 /*	$NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/filedesc.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/unpcb.h>
45 #include <sys/un.h>
46 #include <sys/namei.h>
47 #include <sys/vnode.h>
48 #include <sys/file.h>
49 #include <sys/stat.h>
50 #include <sys/mbuf.h>
51 #include <sys/task.h>
52 #include <sys/pledge.h>
53 #include <sys/pool.h>
54 #include <sys/rwlock.h>
55 #include <sys/mutex.h>
56 #include <sys/sysctl.h>
57 #include <sys/lock.h>
58 #include <sys/refcnt.h>
59 
60 #include "kcov.h"
61 #if NKCOV > 0
62 #include <sys/kcov.h>
63 #endif
64 
65 /*
66  * Locks used to protect global data and struct members:
67  *      I       immutable after creation
68  *      D       unp_df_lock
69  *      G       unp_gc_lock
70  *      M       unp_ino_mtx
71  *      R       unp_rights_mtx
72  *      a       atomic
73  *      s       socket lock
74  */
75 
76 struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk");
77 struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk");
78 
79 struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
80 struct mutex unp_ino_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
81 
82 /*
83  * Stack of sets of files that were passed over a socket but were
84  * not received and need to be closed.
85  */
86 struct	unp_deferral {
87 	SLIST_ENTRY(unp_deferral)	ud_link;	/* [D] */
88 	int				ud_n;		/* [I] */
89 	/* followed by ud_n struct fdpass */
90 	struct fdpass			ud_fp[];	/* [I] */
91 };
92 
93 void	uipc_setaddr(const struct unpcb *, struct mbuf *);
94 void	unp_discard(struct fdpass *, int);
95 void	unp_remove_gcrefs(struct fdpass *, int);
96 void	unp_restore_gcrefs(struct fdpass *, int);
97 void	unp_scan(struct mbuf *, void (*)(struct fdpass *, int));
98 int	unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *);
99 static inline void unp_ref(struct unpcb *);
100 static inline void unp_rele(struct unpcb *);
101 struct socket *unp_solock_peer(struct socket *);
102 
103 struct pool unpcb_pool;
104 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL);
105 
106 /*
107  * Unix communications domain.
108  *
109  * TODO:
110  *	RDM
111  *	rethink name space problems
112  *	need a proper out-of-band
113  */
114 const struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
115 
116 /* [G] list of all UNIX domain sockets, for unp_gc() */
117 LIST_HEAD(unp_head, unpcb)	unp_head =
118 	LIST_HEAD_INITIALIZER(unp_head);
119 /* [D] list of sets of files that were sent over sockets that are now closed */
120 SLIST_HEAD(,unp_deferral)	unp_deferred =
121 	SLIST_HEAD_INITIALIZER(unp_deferred);
122 
123 ino_t	unp_ino;	/* [U] prototype for fake inode numbers */
124 int	unp_rights;	/* [R] file descriptors in flight */
125 int	unp_defer;	/* [G] number of deferred fp to close by the GC task */
126 int	unp_gcing;	/* [G] GC task currently running */
127 
128 const struct pr_usrreqs uipc_usrreqs = {
129 	.pru_attach	= uipc_attach,
130 	.pru_detach	= uipc_detach,
131 	.pru_bind	= uipc_bind,
132 	.pru_listen	= uipc_listen,
133 	.pru_connect	= uipc_connect,
134 	.pru_accept	= uipc_accept,
135 	.pru_disconnect	= uipc_disconnect,
136 	.pru_shutdown	= uipc_shutdown,
137 	.pru_rcvd	= uipc_rcvd,
138 	.pru_send	= uipc_send,
139 	.pru_abort	= uipc_abort,
140 	.pru_sense	= uipc_sense,
141 	.pru_sockaddr	= uipc_sockaddr,
142 	.pru_peeraddr	= uipc_peeraddr,
143 	.pru_connect2	= uipc_connect2,
144 };
145 
146 const struct pr_usrreqs uipc_dgram_usrreqs = {
147 	.pru_attach	= uipc_attach,
148 	.pru_detach	= uipc_detach,
149 	.pru_bind	= uipc_bind,
150 	.pru_listen	= uipc_listen,
151 	.pru_connect	= uipc_connect,
152 	.pru_disconnect	= uipc_disconnect,
153 	.pru_shutdown	= uipc_dgram_shutdown,
154 	.pru_send	= uipc_dgram_send,
155 	.pru_sense	= uipc_sense,
156 	.pru_sockaddr	= uipc_sockaddr,
157 	.pru_peeraddr	= uipc_peeraddr,
158 	.pru_connect2	= uipc_connect2,
159 };
160 
161 void
unp_init(void)162 unp_init(void)
163 {
164 	pool_init(&unpcb_pool, sizeof(struct unpcb), 0,
165 	    IPL_SOFTNET, 0, "unpcb", NULL);
166 }
167 
168 static inline void
unp_ref(struct unpcb * unp)169 unp_ref(struct unpcb *unp)
170 {
171 	refcnt_take(&unp->unp_refcnt);
172 }
173 
174 static inline void
unp_rele(struct unpcb * unp)175 unp_rele(struct unpcb *unp)
176 {
177 	refcnt_rele_wake(&unp->unp_refcnt);
178 }
179 
180 struct socket *
unp_solock_peer(struct socket * so)181 unp_solock_peer(struct socket *so)
182 {
183 	struct unpcb *unp, *unp2;
184 	struct socket *so2;
185 
186 	unp = so->so_pcb;
187 
188 again:
189 	if ((unp2 = unp->unp_conn) == NULL)
190 		return NULL;
191 
192 	so2 = unp2->unp_socket;
193 
194 	if (so < so2)
195 		solock(so2);
196 	else if (so > so2) {
197 		unp_ref(unp2);
198 		sounlock(so);
199 		solock(so2);
200 		solock(so);
201 
202 		/* Datagram socket could be reconnected due to re-lock. */
203 		if (unp->unp_conn != unp2) {
204 			sounlock(so2);
205 			unp_rele(unp2);
206 			goto again;
207 		}
208 
209 		unp_rele(unp2);
210 	}
211 
212 	return so2;
213 }
214 
215 void
uipc_setaddr(const struct unpcb * unp,struct mbuf * nam)216 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam)
217 {
218 	if (unp != NULL && unp->unp_addr != NULL) {
219 		nam->m_len = unp->unp_addr->m_len;
220 		memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t),
221 		    nam->m_len);
222 	} else {
223 		nam->m_len = sizeof(sun_noname);
224 		memcpy(mtod(nam, struct sockaddr *), &sun_noname,
225 		    nam->m_len);
226 	}
227 }
228 
229 /*
230  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
231  * for stream sockets, although the total for sender and receiver is
232  * actually only PIPSIZ.
233  * Datagram sockets really use the sendspace as the maximum datagram size,
234  * and don't really want to reserve the sendspace.  Their recvspace should
235  * be large enough for at least one max-size datagram plus address.
236  */
237 #define	PIPSIZ	8192
238 u_int	unpst_sendspace = PIPSIZ;
239 u_int	unpst_recvspace = PIPSIZ;
240 u_int	unpsq_sendspace = PIPSIZ;
241 u_int	unpsq_recvspace = PIPSIZ;
242 u_int	unpdg_sendspace = 2*1024;	/* really max datagram size */
243 u_int	unpdg_recvspace = 16*1024;
244 
245 const struct sysctl_bounded_args unpstctl_vars[] = {
246 	{ UNPCTL_RECVSPACE, &unpst_recvspace, 0, SB_MAX },
247 	{ UNPCTL_SENDSPACE, &unpst_sendspace, 0, SB_MAX },
248 };
249 const struct sysctl_bounded_args unpsqctl_vars[] = {
250 	{ UNPCTL_RECVSPACE, &unpsq_recvspace, 0, SB_MAX },
251 	{ UNPCTL_SENDSPACE, &unpsq_sendspace, 0, SB_MAX },
252 };
253 const struct sysctl_bounded_args unpdgctl_vars[] = {
254 	{ UNPCTL_RECVSPACE, &unpdg_recvspace, 0, SB_MAX },
255 	{ UNPCTL_SENDSPACE, &unpdg_sendspace, 0, SB_MAX },
256 };
257 
258 int
uipc_attach(struct socket * so,int proto,int wait)259 uipc_attach(struct socket *so, int proto, int wait)
260 {
261 	struct unpcb *unp;
262 	int error;
263 
264 	if (so->so_pcb)
265 		return EISCONN;
266 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
267 		switch (so->so_type) {
268 
269 		case SOCK_STREAM:
270 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
271 			break;
272 
273 		case SOCK_SEQPACKET:
274 			error = soreserve(so, unpsq_sendspace, unpsq_recvspace);
275 			break;
276 
277 		case SOCK_DGRAM:
278 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
279 			break;
280 
281 		default:
282 			panic("unp_attach");
283 		}
284 		if (error)
285 			return (error);
286 	}
287 	unp = pool_get(&unpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) |
288 	    PR_ZERO);
289 	if (unp == NULL)
290 		return (ENOBUFS);
291 	refcnt_init(&unp->unp_refcnt);
292 	unp->unp_socket = so;
293 	so->so_pcb = unp;
294 	getnanotime(&unp->unp_ctime);
295 
296 	rw_enter_write(&unp_gc_lock);
297 	LIST_INSERT_HEAD(&unp_head, unp, unp_link);
298 	rw_exit_write(&unp_gc_lock);
299 
300 	return (0);
301 }
302 
303 int
uipc_detach(struct socket * so)304 uipc_detach(struct socket *so)
305 {
306 	struct unpcb *unp = sotounpcb(so);
307 
308 	if (unp == NULL)
309 		return (EINVAL);
310 
311 	unp_detach(unp);
312 
313 	return (0);
314 }
315 
316 int
uipc_bind(struct socket * so,struct mbuf * nam,struct proc * p)317 uipc_bind(struct socket *so, struct mbuf *nam, struct proc *p)
318 {
319 	struct unpcb *unp = sotounpcb(so);
320 	struct sockaddr_un *soun;
321 	struct mbuf *nam2;
322 	struct vnode *vp;
323 	struct vattr vattr;
324 	int error;
325 	struct nameidata nd;
326 	size_t pathlen;
327 
328 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
329 		return (EINVAL);
330 	if (unp->unp_vnode != NULL)
331 		return (EINVAL);
332 	if ((error = unp_nam2sun(nam, &soun, &pathlen)))
333 		return (error);
334 
335 	unp->unp_flags |= UNP_BINDING;
336 
337 	/*
338 	 * Enforce `i_lock' -> `solock' because fifo subsystem
339 	 * requires it. The socket can't be closed concurrently
340 	 * because the file descriptor reference is still held.
341 	 */
342 
343 	sounlock(unp->unp_socket);
344 
345 	nam2 = m_getclr(M_WAITOK, MT_SONAME);
346 	nam2->m_len = sizeof(struct sockaddr_un);
347 	memcpy(mtod(nam2, struct sockaddr_un *), soun,
348 	    offsetof(struct sockaddr_un, sun_path) + pathlen);
349 	/* No need to NUL terminate: m_getclr() returns zero'd mbufs. */
350 
351 	soun = mtod(nam2, struct sockaddr_un *);
352 
353 	/* Fixup sun_len to keep it in sync with m_len. */
354 	soun->sun_len = nam2->m_len;
355 
356 	NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
357 	    soun->sun_path, p);
358 	nd.ni_pledge = PLEDGE_UNIX;
359 	nd.ni_unveil = UNVEIL_CREATE;
360 
361 	KERNEL_LOCK();
362 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
363 	error = namei(&nd);
364 	if (error != 0) {
365 		m_freem(nam2);
366 		solock(unp->unp_socket);
367 		goto out;
368 	}
369 	vp = nd.ni_vp;
370 	if (vp != NULL) {
371 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
372 		if (nd.ni_dvp == vp)
373 			vrele(nd.ni_dvp);
374 		else
375 			vput(nd.ni_dvp);
376 		vrele(vp);
377 		m_freem(nam2);
378 		error = EADDRINUSE;
379 		solock(unp->unp_socket);
380 		goto out;
381 	}
382 	VATTR_NULL(&vattr);
383 	vattr.va_type = VSOCK;
384 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
385 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
386 	vput(nd.ni_dvp);
387 	if (error) {
388 		m_freem(nam2);
389 		solock(unp->unp_socket);
390 		goto out;
391 	}
392 	solock(unp->unp_socket);
393 	unp->unp_addr = nam2;
394 	vp = nd.ni_vp;
395 	vp->v_socket = unp->unp_socket;
396 	unp->unp_vnode = vp;
397 	unp->unp_connid.uid = p->p_ucred->cr_uid;
398 	unp->unp_connid.gid = p->p_ucred->cr_gid;
399 	unp->unp_connid.pid = p->p_p->ps_pid;
400 	unp->unp_flags |= UNP_FEIDSBIND;
401 	VOP_UNLOCK(vp);
402 out:
403 	KERNEL_UNLOCK();
404 	unp->unp_flags &= ~UNP_BINDING;
405 
406 	return (error);
407 }
408 
409 int
uipc_listen(struct socket * so)410 uipc_listen(struct socket *so)
411 {
412 	struct unpcb *unp = sotounpcb(so);
413 
414 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
415 		return (EINVAL);
416 	if (unp->unp_vnode == NULL)
417 		return (EINVAL);
418 	return (0);
419 }
420 
421 int
uipc_connect(struct socket * so,struct mbuf * nam)422 uipc_connect(struct socket *so, struct mbuf *nam)
423 {
424 	return unp_connect(so, nam, curproc);
425 }
426 
427 int
uipc_accept(struct socket * so,struct mbuf * nam)428 uipc_accept(struct socket *so, struct mbuf *nam)
429 {
430 	struct socket *so2;
431 	struct unpcb *unp = sotounpcb(so);
432 
433 	/*
434 	 * Pass back name of connected socket, if it was bound and
435 	 * we are still connected (our peer may have closed already!).
436 	 */
437 	so2 = unp_solock_peer(so);
438 	uipc_setaddr(unp->unp_conn, nam);
439 
440 	if (so2 != NULL && so2 != so)
441 		sounlock(so2);
442 	return (0);
443 }
444 
445 int
uipc_disconnect(struct socket * so)446 uipc_disconnect(struct socket *so)
447 {
448 	struct unpcb *unp = sotounpcb(so);
449 
450 	unp_disconnect(unp);
451 	return (0);
452 }
453 
454 int
uipc_shutdown(struct socket * so)455 uipc_shutdown(struct socket *so)
456 {
457 	struct unpcb *unp = sotounpcb(so);
458 	struct socket *so2;
459 
460 	socantsendmore(so);
461 
462 	if (unp->unp_conn != NULL) {
463 		so2 = unp->unp_conn->unp_socket;
464 		socantrcvmore(so2);
465 	}
466 
467 	return (0);
468 }
469 
470 int
uipc_dgram_shutdown(struct socket * so)471 uipc_dgram_shutdown(struct socket *so)
472 {
473 	socantsendmore(so);
474 	return (0);
475 }
476 
477 void
uipc_rcvd(struct socket * so)478 uipc_rcvd(struct socket *so)
479 {
480 	struct unpcb *unp = sotounpcb(so);
481 	struct socket *so2;
482 
483 	if (unp->unp_conn == NULL)
484 		return;
485 	so2 = unp->unp_conn->unp_socket;
486 
487 	/*
488 	 * Adjust backpressure on sender
489 	 * and wakeup any waiting to write.
490 	 */
491 	mtx_enter(&so->so_rcv.sb_mtx);
492 	mtx_enter(&so2->so_snd.sb_mtx);
493 	so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt;
494 	so2->so_snd.sb_cc = so->so_rcv.sb_cc;
495 	mtx_leave(&so2->so_snd.sb_mtx);
496 	mtx_leave(&so->so_rcv.sb_mtx);
497 	sowwakeup(so2);
498 }
499 
500 int
uipc_send(struct socket * so,struct mbuf * m,struct mbuf * nam,struct mbuf * control)501 uipc_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
502     struct mbuf *control)
503 {
504 	struct unpcb *unp = sotounpcb(so);
505 	struct socket *so2;
506 	int error = 0, dowakeup = 0;
507 
508 	if (control) {
509 		sounlock(so);
510 		error = unp_internalize(control, curproc);
511 		solock(so);
512 		if (error)
513 			goto out;
514 	}
515 
516 	if (unp->unp_conn == NULL) {
517 		error = ENOTCONN;
518 		goto dispose;
519 	}
520 
521 	so2 = unp->unp_conn->unp_socket;
522 
523 	/*
524 	 * Send to paired receive port, and then raise
525 	 * send buffer counts to maintain backpressure.
526 	 * Wake up readers.
527 	 */
528 	/*
529 	 * sbappend*() should be serialized together
530 	 * with so_snd modification.
531 	 */
532 	mtx_enter(&so2->so_rcv.sb_mtx);
533 	mtx_enter(&so->so_snd.sb_mtx);
534 	if (so->so_snd.sb_state & SS_CANTSENDMORE) {
535 		mtx_leave(&so->so_snd.sb_mtx);
536 		mtx_leave(&so2->so_rcv.sb_mtx);
537 		error = EPIPE;
538 		goto dispose;
539 	}
540 	if (control) {
541 		if (sbappendcontrol(so2, &so2->so_rcv, m, control)) {
542 			control = NULL;
543 		} else {
544 			mtx_leave(&so->so_snd.sb_mtx);
545 			mtx_leave(&so2->so_rcv.sb_mtx);
546 			error = ENOBUFS;
547 			goto dispose;
548 		}
549 	} else if (so->so_type == SOCK_SEQPACKET)
550 		sbappendrecord(so2, &so2->so_rcv, m);
551 	else
552 		sbappend(so2, &so2->so_rcv, m);
553 	so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt;
554 	so->so_snd.sb_cc = so2->so_rcv.sb_cc;
555 	if (so2->so_rcv.sb_cc > 0)
556 		dowakeup = 1;
557 	mtx_leave(&so->so_snd.sb_mtx);
558 	mtx_leave(&so2->so_rcv.sb_mtx);
559 
560 	if (dowakeup)
561 		sorwakeup(so2);
562 
563 	m = NULL;
564 
565 dispose:
566 	/* we need to undo unp_internalize in case of errors */
567 	if (control && error)
568 		unp_dispose(control);
569 
570 out:
571 	m_freem(control);
572 	m_freem(m);
573 
574 	return (error);
575 }
576 
577 int
uipc_dgram_send(struct socket * so,struct mbuf * m,struct mbuf * nam,struct mbuf * control)578 uipc_dgram_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
579     struct mbuf *control)
580 {
581 	struct unpcb *unp = sotounpcb(so);
582 	struct socket *so2;
583 	const struct sockaddr *from;
584 	int error = 0, dowakeup = 0;
585 
586 	if (control) {
587 		sounlock(so);
588 		error = unp_internalize(control, curproc);
589 		solock(so);
590 		if (error)
591 			goto out;
592 	}
593 
594 	if (nam) {
595 		if (unp->unp_conn) {
596 			error = EISCONN;
597 			goto dispose;
598 		}
599 		error = unp_connect(so, nam, curproc);
600 		if (error)
601 			goto dispose;
602 	}
603 
604 	if (unp->unp_conn == NULL) {
605 		if (nam != NULL)
606 			error = ECONNREFUSED;
607 		else
608 			error = ENOTCONN;
609 		goto dispose;
610 	}
611 
612 	so2 = unp->unp_conn->unp_socket;
613 
614 	if (unp->unp_addr)
615 		from = mtod(unp->unp_addr, struct sockaddr *);
616 	else
617 		from = &sun_noname;
618 
619 	mtx_enter(&so2->so_rcv.sb_mtx);
620 	if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) {
621 		dowakeup = 1;
622 		m = NULL;
623 		control = NULL;
624 	} else
625 		error = ENOBUFS;
626 	mtx_leave(&so2->so_rcv.sb_mtx);
627 
628 	if (dowakeup)
629 		sorwakeup(so2);
630 	if (nam)
631 		unp_disconnect(unp);
632 
633 dispose:
634 	/* we need to undo unp_internalize in case of errors */
635 	if (control && error)
636 		unp_dispose(control);
637 
638 out:
639 	m_freem(control);
640 	m_freem(m);
641 
642 	return (error);
643 }
644 
645 void
uipc_abort(struct socket * so)646 uipc_abort(struct socket *so)
647 {
648 	struct unpcb *unp = sotounpcb(so);
649 
650 	unp_detach(unp);
651 	sofree(so, 0);
652 }
653 
654 int
uipc_sense(struct socket * so,struct stat * sb)655 uipc_sense(struct socket *so, struct stat *sb)
656 {
657 	struct unpcb *unp = sotounpcb(so);
658 
659 	sb->st_blksize = so->so_snd.sb_hiwat;
660 	sb->st_dev = NODEV;
661 	mtx_enter(&unp_ino_mtx);
662 	if (unp->unp_ino == 0)
663 		unp->unp_ino = unp_ino++;
664 	mtx_leave(&unp_ino_mtx);
665 	sb->st_atim.tv_sec =
666 	    sb->st_mtim.tv_sec =
667 	    sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec;
668 	sb->st_atim.tv_nsec =
669 	    sb->st_mtim.tv_nsec =
670 	    sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec;
671 	sb->st_ino = unp->unp_ino;
672 
673 	return (0);
674 }
675 
676 int
uipc_sockaddr(struct socket * so,struct mbuf * nam)677 uipc_sockaddr(struct socket *so, struct mbuf *nam)
678 {
679 	struct unpcb *unp = sotounpcb(so);
680 
681 	uipc_setaddr(unp, nam);
682 	return (0);
683 }
684 
685 int
uipc_peeraddr(struct socket * so,struct mbuf * nam)686 uipc_peeraddr(struct socket *so, struct mbuf *nam)
687 {
688 	struct unpcb *unp = sotounpcb(so);
689 	struct socket *so2;
690 
691 	so2 = unp_solock_peer(so);
692 	uipc_setaddr(unp->unp_conn, nam);
693 	if (so2 != NULL && so2 != so)
694 		sounlock(so2);
695 	return (0);
696 }
697 
698 int
uipc_connect2(struct socket * so,struct socket * so2)699 uipc_connect2(struct socket *so, struct socket *so2)
700 {
701 	struct unpcb *unp = sotounpcb(so), *unp2;
702 	int error;
703 
704 	if ((error = unp_connect2(so, so2)))
705 		return (error);
706 
707 	unp->unp_connid.uid = curproc->p_ucred->cr_uid;
708 	unp->unp_connid.gid = curproc->p_ucred->cr_gid;
709 	unp->unp_connid.pid = curproc->p_p->ps_pid;
710 	unp->unp_flags |= UNP_FEIDS;
711 	unp2 = sotounpcb(so2);
712 	unp2->unp_connid.uid = curproc->p_ucred->cr_uid;
713 	unp2->unp_connid.gid = curproc->p_ucred->cr_gid;
714 	unp2->unp_connid.pid = curproc->p_p->ps_pid;
715 	unp2->unp_flags |= UNP_FEIDS;
716 
717 	return (0);
718 }
719 
720 int
uipc_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)721 uipc_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
722     size_t newlen)
723 {
724 	int *valp = &unp_defer;
725 
726 	/* All sysctl names at this level are terminal. */
727 	switch (name[0]) {
728 	case SOCK_STREAM:
729 		if (namelen != 2)
730 			return (ENOTDIR);
731 		return sysctl_bounded_arr(unpstctl_vars, nitems(unpstctl_vars),
732 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
733 	case SOCK_SEQPACKET:
734 		if (namelen != 2)
735 			return (ENOTDIR);
736 		return sysctl_bounded_arr(unpsqctl_vars, nitems(unpsqctl_vars),
737 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
738 	case SOCK_DGRAM:
739 		if (namelen != 2)
740 			return (ENOTDIR);
741 		return sysctl_bounded_arr(unpdgctl_vars, nitems(unpdgctl_vars),
742 		    name + 1, namelen - 1, oldp, oldlenp, newp, newlen);
743 	case NET_UNIX_INFLIGHT:
744 		valp = &unp_rights;
745 		/* FALLTHROUGH */
746 	case NET_UNIX_DEFERRED:
747 		if (namelen != 1)
748 			return (ENOTDIR);
749 		return sysctl_rdint(oldp, oldlenp, newp, *valp);
750 	default:
751 		return (ENOPROTOOPT);
752 	}
753 }
754 
755 void
unp_detach(struct unpcb * unp)756 unp_detach(struct unpcb *unp)
757 {
758 	struct socket *so = unp->unp_socket;
759 	struct vnode *vp = unp->unp_vnode;
760 	struct unpcb *unp2;
761 
762 	unp->unp_vnode = NULL;
763 
764 	/*
765 	 * Enforce `i_lock' -> `solock()' lock order.
766 	 */
767 	sounlock(so);
768 
769 	rw_enter_write(&unp_gc_lock);
770 	LIST_REMOVE(unp, unp_link);
771 	rw_exit_write(&unp_gc_lock);
772 
773 	if (vp != NULL) {
774 		VOP_LOCK(vp, LK_EXCLUSIVE);
775 		vp->v_socket = NULL;
776 
777 		KERNEL_LOCK();
778 		vput(vp);
779 		KERNEL_UNLOCK();
780 	}
781 
782 	solock(so);
783 
784 	if (unp->unp_conn != NULL) {
785 		/*
786 		 * Datagram socket could be connected to itself.
787 		 * Such socket will be disconnected here.
788 		 */
789 		unp_disconnect(unp);
790 	}
791 
792 	while ((unp2 = SLIST_FIRST(&unp->unp_refs)) != NULL) {
793 		struct socket *so2 = unp2->unp_socket;
794 
795 		if (so < so2)
796 			solock(so2);
797 		else {
798 			unp_ref(unp2);
799 			sounlock(so);
800 			solock(so2);
801 			solock(so);
802 
803 			if (unp2->unp_conn != unp) {
804 				/* `unp2' was disconnected due to re-lock. */
805 				sounlock(so2);
806 				unp_rele(unp2);
807 				continue;
808 			}
809 
810 			unp_rele(unp2);
811 		}
812 
813 		unp2->unp_conn = NULL;
814 		SLIST_REMOVE(&unp->unp_refs, unp2, unpcb, unp_nextref);
815 		so2->so_error = ECONNRESET;
816 		so2->so_state &= ~SS_ISCONNECTED;
817 
818 		sounlock(so2);
819 	}
820 
821 	sounlock(so);
822 	refcnt_finalize(&unp->unp_refcnt, "unpfinal");
823 	solock(so);
824 
825 	soisdisconnected(so);
826 	so->so_pcb = NULL;
827 	m_freem(unp->unp_addr);
828 	pool_put(&unpcb_pool, unp);
829 	if (unp_rights)
830 		task_add(systqmp, &unp_gc_task);
831 }
832 
833 int
unp_connect(struct socket * so,struct mbuf * nam,struct proc * p)834 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p)
835 {
836 	struct sockaddr_un *soun;
837 	struct vnode *vp;
838 	struct socket *so2, *so3;
839 	struct unpcb *unp, *unp2, *unp3;
840 	struct nameidata nd;
841 	int error;
842 
843 	unp = sotounpcb(so);
844 	if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING))
845 		return (EISCONN);
846 	if ((error = unp_nam2sun(nam, &soun, NULL)))
847 		return (error);
848 
849 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
850 	nd.ni_pledge = PLEDGE_UNIX;
851 	nd.ni_unveil = UNVEIL_WRITE;
852 
853 	unp->unp_flags |= UNP_CONNECTING;
854 
855 	/*
856 	 * Enforce `i_lock' -> `solock' because fifo subsystem
857 	 * requires it. The socket can't be closed concurrently
858 	 * because the file descriptor reference is still held.
859 	 */
860 
861 	sounlock(so);
862 
863 	KERNEL_LOCK();
864 	error = namei(&nd);
865 	if (error != 0)
866 		goto unlock;
867 	vp = nd.ni_vp;
868 	if (vp->v_type != VSOCK) {
869 		error = ENOTSOCK;
870 		goto put;
871 	}
872 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
873 		goto put;
874 	so2 = vp->v_socket;
875 	if (so2 == NULL) {
876 		error = ECONNREFUSED;
877 		goto put;
878 	}
879 	if (so->so_type != so2->so_type) {
880 		error = EPROTOTYPE;
881 		goto put;
882 	}
883 
884 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
885 		solock(so2);
886 
887 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
888 		    (so3 = sonewconn(so2, 0, M_WAIT)) == NULL) {
889 			error = ECONNREFUSED;
890 		}
891 
892 		sounlock(so2);
893 
894 		if (error != 0)
895 			goto put;
896 
897 		/*
898 		 * Since `so2' is protected by vnode(9) lock, `so3'
899 		 * can't be PRU_ABORT'ed here.
900 		 */
901 		solock_pair(so, so3);
902 
903 		unp2 = sotounpcb(so2);
904 		unp3 = sotounpcb(so3);
905 
906 		/*
907 		 * `unp_addr', `unp_connid' and 'UNP_FEIDSBIND' flag
908 		 * are immutable since we set them in uipc_bind().
909 		 */
910 		if (unp2->unp_addr)
911 			unp3->unp_addr =
912 			    m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT);
913 		unp3->unp_connid.uid = p->p_ucred->cr_uid;
914 		unp3->unp_connid.gid = p->p_ucred->cr_gid;
915 		unp3->unp_connid.pid = p->p_p->ps_pid;
916 		unp3->unp_flags |= UNP_FEIDS;
917 
918 		if (unp2->unp_flags & UNP_FEIDSBIND) {
919 			unp->unp_connid = unp2->unp_connid;
920 			unp->unp_flags |= UNP_FEIDS;
921 		}
922 
923 		so2 = so3;
924 	} else {
925 		if (so2 != so)
926 			solock_pair(so, so2);
927 		else
928 			solock(so);
929 	}
930 
931 	error = unp_connect2(so, so2);
932 
933 	sounlock(so);
934 
935 	/*
936 	 * `so2' can't be PRU_ABORT'ed concurrently
937 	 */
938 	if (so2 != so)
939 		sounlock(so2);
940 put:
941 	vput(vp);
942 unlock:
943 	KERNEL_UNLOCK();
944 	solock(so);
945 	unp->unp_flags &= ~UNP_CONNECTING;
946 
947 	/*
948 	 * The peer socket could be closed by concurrent thread
949 	 * when `so' and `vp' are unlocked.
950 	 */
951 	if (error == 0 && unp->unp_conn == NULL)
952 		error = ECONNREFUSED;
953 
954 	return (error);
955 }
956 
957 int
unp_connect2(struct socket * so,struct socket * so2)958 unp_connect2(struct socket *so, struct socket *so2)
959 {
960 	struct unpcb *unp = sotounpcb(so);
961 	struct unpcb *unp2;
962 
963 	soassertlocked(so);
964 	soassertlocked(so2);
965 
966 	if (so2->so_type != so->so_type)
967 		return (EPROTOTYPE);
968 	unp2 = sotounpcb(so2);
969 	unp->unp_conn = unp2;
970 	switch (so->so_type) {
971 
972 	case SOCK_DGRAM:
973 		SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref);
974 		soisconnected(so);
975 		break;
976 
977 	case SOCK_STREAM:
978 	case SOCK_SEQPACKET:
979 		unp2->unp_conn = unp;
980 		soisconnected(so);
981 		soisconnected(so2);
982 		break;
983 
984 	default:
985 		panic("unp_connect2");
986 	}
987 	return (0);
988 }
989 
990 void
unp_disconnect(struct unpcb * unp)991 unp_disconnect(struct unpcb *unp)
992 {
993 	struct socket *so2;
994 	struct unpcb *unp2;
995 
996 	if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL)
997 		return;
998 
999 	unp2 = unp->unp_conn;
1000 	unp->unp_conn = NULL;
1001 
1002 	switch (unp->unp_socket->so_type) {
1003 
1004 	case SOCK_DGRAM:
1005 		SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref);
1006 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
1007 		break;
1008 
1009 	case SOCK_STREAM:
1010 	case SOCK_SEQPACKET:
1011 		unp->unp_socket->so_snd.sb_mbcnt = 0;
1012 		unp->unp_socket->so_snd.sb_cc = 0;
1013 		soisdisconnected(unp->unp_socket);
1014 		unp2->unp_conn = NULL;
1015 		unp2->unp_socket->so_snd.sb_mbcnt = 0;
1016 		unp2->unp_socket->so_snd.sb_cc = 0;
1017 		soisdisconnected(unp2->unp_socket);
1018 		break;
1019 	}
1020 
1021 	if (so2 != unp->unp_socket)
1022 		sounlock(so2);
1023 }
1024 
1025 static struct unpcb *
fptounp(struct file * fp)1026 fptounp(struct file *fp)
1027 {
1028 	struct socket *so;
1029 
1030 	if (fp->f_type != DTYPE_SOCKET)
1031 		return (NULL);
1032 	if ((so = fp->f_data) == NULL)
1033 		return (NULL);
1034 	if (so->so_proto->pr_domain != &unixdomain)
1035 		return (NULL);
1036 	return (sotounpcb(so));
1037 }
1038 
1039 int
unp_externalize(struct mbuf * rights,socklen_t controllen,int flags)1040 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags)
1041 {
1042 	struct proc *p = curproc;		/* XXX */
1043 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
1044 	struct filedesc *fdp = p->p_fd;
1045 	int i, *fds = NULL;
1046 	struct fdpass *rp;
1047 	struct file *fp;
1048 	int nfds, error = 0;
1049 
1050 	/*
1051 	 * This code only works because SCM_RIGHTS is the only supported
1052 	 * control message type on unix sockets. Enforce this here.
1053 	 */
1054 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET)
1055 		return EINVAL;
1056 
1057 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
1058 	    sizeof(struct fdpass);
1059 	if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr)))
1060 		controllen = 0;
1061 	else
1062 		controllen -= CMSG_ALIGN(sizeof(struct cmsghdr));
1063 	if (nfds > controllen / sizeof(int)) {
1064 		error = EMSGSIZE;
1065 		goto out;
1066 	}
1067 
1068 	/* Make sure the recipient should be able to see the descriptors.. */
1069 	rp = (struct fdpass *)CMSG_DATA(cm);
1070 
1071 	/* fdp->fd_rdir requires KERNEL_LOCK() */
1072 	KERNEL_LOCK();
1073 
1074 	for (i = 0; i < nfds; i++) {
1075 		fp = rp->fp;
1076 		rp++;
1077 		error = pledge_recvfd(p, fp);
1078 		if (error)
1079 			break;
1080 
1081 		/*
1082 		 * No to block devices.  If passing a directory,
1083 		 * make sure that it is underneath the root.
1084 		 */
1085 		if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) {
1086 			struct vnode *vp = (struct vnode *)fp->f_data;
1087 
1088 			if (vp->v_type == VBLK ||
1089 			    (vp->v_type == VDIR &&
1090 			    !vn_isunder(vp, fdp->fd_rdir, p))) {
1091 				error = EPERM;
1092 				break;
1093 			}
1094 		}
1095 	}
1096 
1097 	KERNEL_UNLOCK();
1098 
1099 	if (error)
1100 		goto out;
1101 
1102 	fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK);
1103 
1104 	fdplock(fdp);
1105 restart:
1106 	/*
1107 	 * First loop -- allocate file descriptor table slots for the
1108 	 * new descriptors.
1109 	 */
1110 	rp = ((struct fdpass *)CMSG_DATA(cm));
1111 	for (i = 0; i < nfds; i++) {
1112 		if ((error = fdalloc(p, 0, &fds[i])) != 0) {
1113 			/*
1114 			 * Back out what we've done so far.
1115 			 */
1116 			for (--i; i >= 0; i--)
1117 				fdremove(fdp, fds[i]);
1118 
1119 			if (error == ENOSPC) {
1120 				fdexpand(p);
1121 				goto restart;
1122 			}
1123 
1124 			fdpunlock(fdp);
1125 
1126 			/*
1127 			 * This is the error that has historically
1128 			 * been returned, and some callers may
1129 			 * expect it.
1130 			 */
1131 
1132 			error = EMSGSIZE;
1133 			goto out;
1134 		}
1135 
1136 		/*
1137 		 * Make the slot reference the descriptor so that
1138 		 * fdalloc() works properly.. We finalize it all
1139 		 * in the loop below.
1140 		 */
1141 		mtx_enter(&fdp->fd_fplock);
1142 		KASSERT(fdp->fd_ofiles[fds[i]] == NULL);
1143 		fdp->fd_ofiles[fds[i]] = rp->fp;
1144 		mtx_leave(&fdp->fd_fplock);
1145 
1146 		fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED);
1147 		if (flags & MSG_CMSG_CLOEXEC)
1148 			fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE;
1149 
1150 		rp++;
1151 	}
1152 
1153 	/*
1154 	 * Keep `fdp' locked to prevent concurrent close() of just
1155 	 * inserted descriptors. Such descriptors could have the only
1156 	 * `f_count' reference which is now shared between control
1157 	 * message and `fdp'.
1158 	 */
1159 
1160 	/*
1161 	 * Now that adding them has succeeded, update all of the
1162 	 * descriptor passing state.
1163 	 */
1164 	rp = (struct fdpass *)CMSG_DATA(cm);
1165 
1166 	for (i = 0; i < nfds; i++) {
1167 		struct unpcb *unp;
1168 
1169 		fp = rp->fp;
1170 		rp++;
1171 		if ((unp = fptounp(fp)) != NULL) {
1172 			rw_enter_write(&unp_gc_lock);
1173 			unp->unp_msgcount--;
1174 			rw_exit_write(&unp_gc_lock);
1175 		}
1176 	}
1177 	fdpunlock(fdp);
1178 
1179 	mtx_enter(&unp_rights_mtx);
1180 	unp_rights -= nfds;
1181 	mtx_leave(&unp_rights_mtx);
1182 
1183 	/*
1184 	 * Copy temporary array to message and adjust length, in case of
1185 	 * transition from large struct file pointers to ints.
1186 	 */
1187 	memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int));
1188 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
1189 	rights->m_len = CMSG_LEN(nfds * sizeof(int));
1190  out:
1191 	if (fds != NULL)
1192 		free(fds, M_TEMP, nfds * sizeof(int));
1193 
1194 	if (error) {
1195 		if (nfds > 0) {
1196 			/*
1197 			 * No lock required. We are the only `cm' holder.
1198 			 */
1199 			rp = ((struct fdpass *)CMSG_DATA(cm));
1200 			unp_discard(rp, nfds);
1201 		}
1202 	}
1203 
1204 	return (error);
1205 }
1206 
1207 int
unp_internalize(struct mbuf * control,struct proc * p)1208 unp_internalize(struct mbuf *control, struct proc *p)
1209 {
1210 	struct filedesc *fdp = p->p_fd;
1211 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1212 	struct fdpass *rp;
1213 	struct file *fp;
1214 	struct unpcb *unp;
1215 	int i, error;
1216 	int nfds, *ip, fd, neededspace;
1217 
1218 	/*
1219 	 * Check for two potential msg_controllen values because
1220 	 * IETF stuck their nose in a place it does not belong.
1221 	 */
1222 	if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0))
1223 		return (EINVAL);
1224 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
1225 	    !(cm->cmsg_len == control->m_len ||
1226 	    control->m_len == CMSG_ALIGN(cm->cmsg_len)))
1227 		return (EINVAL);
1228 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
1229 
1230 	mtx_enter(&unp_rights_mtx);
1231 	if (unp_rights + nfds > maxfiles / 10) {
1232 		mtx_leave(&unp_rights_mtx);
1233 		return (EMFILE);
1234 	}
1235 	unp_rights += nfds;
1236 	mtx_leave(&unp_rights_mtx);
1237 
1238 	/* Make sure we have room for the struct file pointers */
1239 morespace:
1240 	neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) -
1241 	    control->m_len;
1242 	if (neededspace > m_trailingspace(control)) {
1243 		char *tmp;
1244 		/* if we already have a cluster, the message is just too big */
1245 		if (control->m_flags & M_EXT) {
1246 			error = E2BIG;
1247 			goto nospace;
1248 		}
1249 
1250 		/* copy cmsg data temporarily out of the mbuf */
1251 		tmp = malloc(control->m_len, M_TEMP, M_WAITOK);
1252 		memcpy(tmp, mtod(control, caddr_t), control->m_len);
1253 
1254 		/* allocate a cluster and try again */
1255 		MCLGET(control, M_WAIT);
1256 		if ((control->m_flags & M_EXT) == 0) {
1257 			free(tmp, M_TEMP, control->m_len);
1258 			error = ENOBUFS;       /* allocation failed */
1259 			goto nospace;
1260 		}
1261 
1262 		/* copy the data back into the cluster */
1263 		cm = mtod(control, struct cmsghdr *);
1264 		memcpy(cm, tmp, control->m_len);
1265 		free(tmp, M_TEMP, control->m_len);
1266 		goto morespace;
1267 	}
1268 
1269 	/* adjust message & mbuf to note amount of space actually used. */
1270 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass));
1271 	control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass));
1272 
1273 	ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
1274 	rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1;
1275 	fdplock(fdp);
1276 	for (i = 0; i < nfds; i++) {
1277 		memcpy(&fd, ip, sizeof fd);
1278 		ip--;
1279 		if ((fp = fd_getfile(fdp, fd)) == NULL) {
1280 			error = EBADF;
1281 			goto fail;
1282 		}
1283 		if (fp->f_count >= FDUP_MAX_COUNT) {
1284 			error = EDEADLK;
1285 			goto fail;
1286 		}
1287 		error = pledge_sendfd(p, fp);
1288 		if (error)
1289 			goto fail;
1290 
1291 		/* kqueue descriptors cannot be copied */
1292 		if (fp->f_type == DTYPE_KQUEUE) {
1293 			error = EINVAL;
1294 			goto fail;
1295 		}
1296 #if NKCOV > 0
1297 		/* kcov descriptors cannot be copied */
1298 		if (fp->f_type == DTYPE_VNODE && kcov_vnode(fp->f_data)) {
1299 			error = EINVAL;
1300 			goto fail;
1301 		}
1302 #endif
1303 		rp->fp = fp;
1304 		rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED;
1305 		rp--;
1306 		if ((unp = fptounp(fp)) != NULL) {
1307 			rw_enter_write(&unp_gc_lock);
1308 			unp->unp_msgcount++;
1309 			unp->unp_file = fp;
1310 			rw_exit_write(&unp_gc_lock);
1311 		}
1312 	}
1313 	fdpunlock(fdp);
1314 	return (0);
1315 fail:
1316 	fdpunlock(fdp);
1317 	if (fp != NULL)
1318 		FRELE(fp, p);
1319 	/* Back out what we just did. */
1320 	for ( ; i > 0; i--) {
1321 		rp++;
1322 		fp = rp->fp;
1323 		if ((unp = fptounp(fp)) != NULL) {
1324 			rw_enter_write(&unp_gc_lock);
1325 			unp->unp_msgcount--;
1326 			rw_exit_write(&unp_gc_lock);
1327 		}
1328 		FRELE(fp, p);
1329 	}
1330 
1331 nospace:
1332 	mtx_enter(&unp_rights_mtx);
1333 	unp_rights -= nfds;
1334 	mtx_leave(&unp_rights_mtx);
1335 
1336 	return (error);
1337 }
1338 
1339 void
unp_gc(void * arg __unused)1340 unp_gc(void *arg __unused)
1341 {
1342 	struct unp_deferral *defer;
1343 	struct file *fp;
1344 	struct socket *so;
1345 	struct unpcb *unp;
1346 	int nunref, i;
1347 
1348 	rw_enter_write(&unp_gc_lock);
1349 	if (unp_gcing)
1350 		goto unlock;
1351 	unp_gcing = 1;
1352 	rw_exit_write(&unp_gc_lock);
1353 
1354 	rw_enter_write(&unp_df_lock);
1355 	/* close any fds on the deferred list */
1356 	while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) {
1357 		SLIST_REMOVE_HEAD(&unp_deferred, ud_link);
1358 		rw_exit_write(&unp_df_lock);
1359 		for (i = 0; i < defer->ud_n; i++) {
1360 			fp = defer->ud_fp[i].fp;
1361 			if (fp == NULL)
1362 				continue;
1363 			if ((unp = fptounp(fp)) != NULL) {
1364 				rw_enter_write(&unp_gc_lock);
1365 				unp->unp_msgcount--;
1366 				rw_exit_write(&unp_gc_lock);
1367 			}
1368 			mtx_enter(&unp_rights_mtx);
1369 			unp_rights--;
1370 			mtx_leave(&unp_rights_mtx);
1371 			 /* closef() expects a refcount of 2 */
1372 			FREF(fp);
1373 			(void) closef(fp, NULL);
1374 		}
1375 		free(defer, M_TEMP, sizeof(*defer) +
1376 		    sizeof(struct fdpass) * defer->ud_n);
1377 		rw_enter_write(&unp_df_lock);
1378 	}
1379 	rw_exit_write(&unp_df_lock);
1380 
1381 	nunref = 0;
1382 
1383 	rw_enter_write(&unp_gc_lock);
1384 
1385 	/*
1386 	 * Determine sockets which may be prospectively dead. Such
1387 	 * sockets have their `unp_msgcount' equal to the `f_count'.
1388 	 * If `unp_msgcount' is 0, the socket has not been passed
1389 	 * and can't be unreferenced.
1390 	 */
1391 	LIST_FOREACH(unp, &unp_head, unp_link) {
1392 		unp->unp_gcflags = 0;
1393 
1394 		if (unp->unp_msgcount == 0)
1395 			continue;
1396 		if ((fp = unp->unp_file) == NULL)
1397 			continue;
1398 		if (fp->f_count == unp->unp_msgcount) {
1399 			unp->unp_gcflags |= UNP_GCDEAD;
1400 			unp->unp_gcrefs = unp->unp_msgcount;
1401 			nunref++;
1402 		}
1403 	}
1404 
1405 	/*
1406 	 * Scan all sockets previously marked as dead. Remove
1407 	 * the `unp_gcrefs' reference each socket holds on any
1408 	 * dead socket in its buffer.
1409 	 */
1410 	LIST_FOREACH(unp, &unp_head, unp_link) {
1411 		if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1412 			continue;
1413 		so = unp->unp_socket;
1414 		mtx_enter(&so->so_rcv.sb_mtx);
1415 		unp_scan(so->so_rcv.sb_mb, unp_remove_gcrefs);
1416 		mtx_leave(&so->so_rcv.sb_mtx);
1417 	}
1418 
1419 	/*
1420 	 * If the dead socket has `unp_gcrefs' reference counter
1421 	 * greater than 0, it can't be unreferenced. Mark it as
1422 	 * alive and increment the `unp_gcrefs' reference for each
1423 	 * dead socket within its buffer. Repeat this until we
1424 	 * have no new alive sockets found.
1425 	 */
1426 	do {
1427 		unp_defer = 0;
1428 
1429 		LIST_FOREACH(unp, &unp_head, unp_link) {
1430 			if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
1431 				continue;
1432 			if (unp->unp_gcrefs == 0)
1433 				continue;
1434 
1435 			unp->unp_gcflags &= ~UNP_GCDEAD;
1436 
1437 			so = unp->unp_socket;
1438 			mtx_enter(&so->so_rcv.sb_mtx);
1439 			unp_scan(so->so_rcv.sb_mb, unp_restore_gcrefs);
1440 			mtx_leave(&so->so_rcv.sb_mtx);
1441 
1442 			KASSERT(nunref > 0);
1443 			nunref--;
1444 		}
1445 	} while (unp_defer > 0);
1446 
1447 	/*
1448 	 * If there are any unreferenced sockets, then for each dispose
1449 	 * of files in its receive buffer and then close it.
1450 	 */
1451 	if (nunref) {
1452 		LIST_FOREACH(unp, &unp_head, unp_link) {
1453 			if (unp->unp_gcflags & UNP_GCDEAD) {
1454 				struct sockbuf *sb = &unp->unp_socket->so_rcv;
1455 				struct mbuf *m;
1456 
1457 				/*
1458 				 * This socket could still be connected
1459 				 * and if so it's `so_rcv' is still
1460 				 * accessible by concurrent PRU_SEND
1461 				 * thread.
1462 				 */
1463 
1464 				mtx_enter(&sb->sb_mtx);
1465 				m = sb->sb_mb;
1466 				memset(&sb->sb_startzero, 0,
1467 				    (caddr_t)&sb->sb_endzero -
1468 				        (caddr_t)&sb->sb_startzero);
1469 				sb->sb_timeo_nsecs = INFSLP;
1470 				mtx_leave(&sb->sb_mtx);
1471 
1472 				unp_scan(m, unp_discard);
1473 				m_purge(m);
1474 			}
1475 		}
1476 	}
1477 
1478 	unp_gcing = 0;
1479 unlock:
1480 	rw_exit_write(&unp_gc_lock);
1481 }
1482 
1483 void
unp_dispose(struct mbuf * m)1484 unp_dispose(struct mbuf *m)
1485 {
1486 
1487 	if (m)
1488 		unp_scan(m, unp_discard);
1489 }
1490 
1491 void
unp_scan(struct mbuf * m0,void (* op)(struct fdpass *,int))1492 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int))
1493 {
1494 	struct mbuf *m;
1495 	struct fdpass *rp;
1496 	struct cmsghdr *cm;
1497 	int qfds;
1498 
1499 	while (m0) {
1500 		for (m = m0; m; m = m->m_next) {
1501 			if (m->m_type == MT_CONTROL &&
1502 			    m->m_len >= sizeof(*cm)) {
1503 				cm = mtod(m, struct cmsghdr *);
1504 				if (cm->cmsg_level != SOL_SOCKET ||
1505 				    cm->cmsg_type != SCM_RIGHTS)
1506 					continue;
1507 				qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
1508 				    / sizeof(struct fdpass);
1509 				if (qfds > 0) {
1510 					rp = (struct fdpass *)CMSG_DATA(cm);
1511 					op(rp, qfds);
1512 				}
1513 				break;		/* XXX, but saves time */
1514 			}
1515 		}
1516 		m0 = m0->m_nextpkt;
1517 	}
1518 }
1519 
1520 void
unp_discard(struct fdpass * rp,int nfds)1521 unp_discard(struct fdpass *rp, int nfds)
1522 {
1523 	struct unp_deferral *defer;
1524 
1525 	/* copy the file pointers to a deferral structure */
1526 	defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK);
1527 	defer->ud_n = nfds;
1528 	memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds);
1529 	memset(rp, 0, sizeof(*rp) * nfds);
1530 
1531 	rw_enter_write(&unp_df_lock);
1532 	SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link);
1533 	rw_exit_write(&unp_df_lock);
1534 
1535 	task_add(systqmp, &unp_gc_task);
1536 }
1537 
1538 void
unp_remove_gcrefs(struct fdpass * rp,int nfds)1539 unp_remove_gcrefs(struct fdpass *rp, int nfds)
1540 {
1541 	struct unpcb *unp;
1542 	int i;
1543 
1544 	rw_assert_wrlock(&unp_gc_lock);
1545 
1546 	for (i = 0; i < nfds; i++) {
1547 		if (rp[i].fp == NULL)
1548 			continue;
1549 		if ((unp = fptounp(rp[i].fp)) == NULL)
1550 			continue;
1551 		if (unp->unp_gcflags & UNP_GCDEAD) {
1552 			KASSERT(unp->unp_gcrefs > 0);
1553 			unp->unp_gcrefs--;
1554 		}
1555 	}
1556 }
1557 
1558 void
unp_restore_gcrefs(struct fdpass * rp,int nfds)1559 unp_restore_gcrefs(struct fdpass *rp, int nfds)
1560 {
1561 	struct unpcb *unp;
1562 	int i;
1563 
1564 	rw_assert_wrlock(&unp_gc_lock);
1565 
1566 	for (i = 0; i < nfds; i++) {
1567 		if (rp[i].fp == NULL)
1568 			continue;
1569 		if ((unp = fptounp(rp[i].fp)) == NULL)
1570 			continue;
1571 		if (unp->unp_gcflags & UNP_GCDEAD) {
1572 			unp->unp_gcrefs++;
1573 			unp_defer++;
1574 		}
1575 	}
1576 }
1577 
1578 int
unp_nam2sun(struct mbuf * nam,struct sockaddr_un ** sun,size_t * pathlen)1579 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen)
1580 {
1581 	struct sockaddr *sa = mtod(nam, struct sockaddr *);
1582 	size_t size, len;
1583 
1584 	if (nam->m_len < offsetof(struct sockaddr, sa_data))
1585 		return EINVAL;
1586 	if (sa->sa_family != AF_UNIX)
1587 		return EAFNOSUPPORT;
1588 	if (sa->sa_len != nam->m_len)
1589 		return EINVAL;
1590 	if (sa->sa_len > sizeof(struct sockaddr_un))
1591 		return EINVAL;
1592 	*sun = (struct sockaddr_un *)sa;
1593 
1594 	/* ensure that sun_path is NUL terminated and fits */
1595 	size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path);
1596 	len = strnlen((*sun)->sun_path, size);
1597 	if (len == sizeof((*sun)->sun_path))
1598 		return EINVAL;
1599 	if (len == size) {
1600 		if (m_trailingspace(nam) == 0)
1601 			return EINVAL;
1602 		nam->m_len++;
1603 		(*sun)->sun_len++;
1604 		(*sun)->sun_path[len] = '\0';
1605 	}
1606 	if (pathlen != NULL)
1607 		*pathlen = len;
1608 
1609 	return 0;
1610 }
1611