xref: /openbsd/sys/kern/uipc_usrreq.c (revision 1821443c)
1 /*	$OpenBSD: uipc_usrreq.c,v 1.24 2003/08/17 22:59:42 tedu Exp $	*/
2 /*	$NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/filedesc.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/socket.h>
42 #include <sys/socketvar.h>
43 #include <sys/unpcb.h>
44 #include <sys/un.h>
45 #include <sys/namei.h>
46 #include <sys/vnode.h>
47 #include <sys/file.h>
48 #include <sys/stat.h>
49 #include <sys/mbuf.h>
50 
51 /*
52  * Unix communications domain.
53  *
54  * TODO:
55  *	SEQPACKET, RDM
56  *	rethink name space problems
57  *	need a proper out-of-band
58  */
59 struct	sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX };
60 ino_t	unp_ino;			/* prototype for fake inode numbers */
61 
62 /*ARGSUSED*/
63 int
64 uipc_usrreq(so, req, m, nam, control)
65 	struct socket *so;
66 	int req;
67 	struct mbuf *m, *nam, *control;
68 {
69 	struct unpcb *unp = sotounpcb(so);
70 	register struct socket *so2;
71 	register int error = 0;
72 	struct proc *p = curproc;	/* XXX */
73 
74 	if (req == PRU_CONTROL)
75 		return (EOPNOTSUPP);
76 	if (req != PRU_SEND && control && control->m_len) {
77 		error = EOPNOTSUPP;
78 		goto release;
79 	}
80 	if (unp == 0 && req != PRU_ATTACH) {
81 		error = EINVAL;
82 		goto release;
83 	}
84 	switch (req) {
85 
86 	case PRU_ATTACH:
87 		if (unp) {
88 			error = EISCONN;
89 			break;
90 		}
91 		error = unp_attach(so);
92 		break;
93 
94 	case PRU_DETACH:
95 		unp_detach(unp);
96 		break;
97 
98 	case PRU_BIND:
99 		error = unp_bind(unp, nam, p);
100 		break;
101 
102 	case PRU_LISTEN:
103 		if (unp->unp_vnode == 0)
104 			error = EINVAL;
105 		break;
106 
107 	case PRU_CONNECT:
108 		error = unp_connect(so, nam, p);
109 		break;
110 
111 	case PRU_CONNECT2:
112 		error = unp_connect2(so, (struct socket *)nam);
113 		break;
114 
115 	case PRU_DISCONNECT:
116 		unp_disconnect(unp);
117 		break;
118 
119 	case PRU_ACCEPT:
120 		/*
121 		 * Pass back name of connected socket,
122 		 * if it was bound and we are still connected
123 		 * (our peer may have closed already!).
124 		 */
125 		if (unp->unp_conn && unp->unp_conn->unp_addr) {
126 			nam->m_len = unp->unp_conn->unp_addr->m_len;
127 			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
128 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
129 		} else {
130 			nam->m_len = sizeof(sun_noname);
131 			*(mtod(nam, struct sockaddr *)) = sun_noname;
132 		}
133 		break;
134 
135 	case PRU_SHUTDOWN:
136 		socantsendmore(so);
137 		unp_shutdown(unp);
138 		break;
139 
140 	case PRU_RCVD:
141 		switch (so->so_type) {
142 
143 		case SOCK_DGRAM:
144 			panic("uipc 1");
145 			/*NOTREACHED*/
146 
147 		case SOCK_STREAM:
148 #define	rcv (&so->so_rcv)
149 #define snd (&so2->so_snd)
150 			if (unp->unp_conn == 0)
151 				break;
152 			so2 = unp->unp_conn->unp_socket;
153 			/*
154 			 * Adjust backpressure on sender
155 			 * and wakeup any waiting to write.
156 			 */
157 			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
158 			unp->unp_mbcnt = rcv->sb_mbcnt;
159 			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
160 			unp->unp_cc = rcv->sb_cc;
161 			sowwakeup(so2);
162 #undef snd
163 #undef rcv
164 			break;
165 
166 		default:
167 			panic("uipc 2");
168 		}
169 		break;
170 
171 	case PRU_SEND:
172 		if (control && (error = unp_internalize(control, p)))
173 			break;
174 		switch (so->so_type) {
175 
176 		case SOCK_DGRAM: {
177 			struct sockaddr *from;
178 
179 			if (nam) {
180 				if (unp->unp_conn) {
181 					error = EISCONN;
182 					break;
183 				}
184 				error = unp_connect(so, nam, p);
185 				if (error)
186 					break;
187 			} else {
188 				if (unp->unp_conn == 0) {
189 					error = ENOTCONN;
190 					break;
191 				}
192 			}
193 			so2 = unp->unp_conn->unp_socket;
194 			if (unp->unp_addr)
195 				from = mtod(unp->unp_addr, struct sockaddr *);
196 			else
197 				from = &sun_noname;
198 			if (sbappendaddr(&so2->so_rcv, from, m, control)) {
199 				sorwakeup(so2);
200 				m = 0;
201 				control = 0;
202 			} else
203 				error = ENOBUFS;
204 			if (nam)
205 				unp_disconnect(unp);
206 			break;
207 		}
208 
209 		case SOCK_STREAM:
210 #define	rcv (&so2->so_rcv)
211 #define	snd (&so->so_snd)
212 			if (so->so_state & SS_CANTSENDMORE) {
213 				error = EPIPE;
214 				break;
215 			}
216 			if (unp->unp_conn == 0)
217 				panic("uipc 3");
218 			so2 = unp->unp_conn->unp_socket;
219 			/*
220 			 * Send to paired receive port, and then reduce
221 			 * send buffer hiwater marks to maintain backpressure.
222 			 * Wake up readers.
223 			 */
224 			if (control) {
225 				if (sbappendcontrol(rcv, m, control))
226 					control = 0;
227 			} else
228 				sbappend(rcv, m);
229 			snd->sb_mbmax -=
230 			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
231 			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
232 			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
233 			unp->unp_conn->unp_cc = rcv->sb_cc;
234 			sorwakeup(so2);
235 			m = 0;
236 #undef snd
237 #undef rcv
238 			break;
239 
240 		default:
241 			panic("uipc 4");
242 		}
243 		break;
244 
245 	case PRU_ABORT:
246 		unp_drop(unp, ECONNABORTED);
247 		break;
248 
249 	case PRU_SENSE:
250 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
251 		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
252 			so2 = unp->unp_conn->unp_socket;
253 			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
254 		}
255 		((struct stat *) m)->st_dev = NODEV;
256 		if (unp->unp_ino == 0)
257 			unp->unp_ino = unp_ino++;
258 		((struct stat *) m)->st_atimespec =
259 		    ((struct stat *) m)->st_mtimespec =
260 		    ((struct stat *) m)->st_ctimespec = unp->unp_ctime;
261 		((struct stat *) m)->st_ino = unp->unp_ino;
262 		return (0);
263 
264 	case PRU_RCVOOB:
265 		return (EOPNOTSUPP);
266 
267 	case PRU_SENDOOB:
268 		error = EOPNOTSUPP;
269 		break;
270 
271 	case PRU_SOCKADDR:
272 		if (unp->unp_addr) {
273 			nam->m_len = unp->unp_addr->m_len;
274 			bcopy(mtod(unp->unp_addr, caddr_t),
275 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
276 		} else
277 			nam->m_len = 0;
278 		break;
279 
280 	case PRU_PEERADDR:
281 		if (unp->unp_conn && unp->unp_conn->unp_addr) {
282 			nam->m_len = unp->unp_conn->unp_addr->m_len;
283 			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
284 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
285 		} else
286 			nam->m_len = 0;
287 		break;
288 
289 	case PRU_PEEREID:
290 		if (unp->unp_flags & UNP_FEIDS) {
291 			nam->m_len = sizeof(struct unpcbid);
292 			bcopy((caddr_t)(&(unp->unp_connid)),
293 			    mtod(nam, caddr_t), (unsigned)nam->m_len);
294 		} else
295 			nam->m_len = 0;
296 		break;
297 
298 	case PRU_SLOWTIMO:
299 		break;
300 
301 	default:
302 		panic("piusrreq");
303 	}
304 release:
305 	if (control)
306 		m_freem(control);
307 	if (m)
308 		m_freem(m);
309 	return (error);
310 }
311 
312 /*
313  * Both send and receive buffers are allocated PIPSIZ bytes of buffering
314  * for stream sockets, although the total for sender and receiver is
315  * actually only PIPSIZ.
316  * Datagram sockets really use the sendspace as the maximum datagram size,
317  * and don't really want to reserve the sendspace.  Their recvspace should
318  * be large enough for at least one max-size datagram plus address.
319  */
320 #define	PIPSIZ	4096
321 u_long	unpst_sendspace = PIPSIZ;
322 u_long	unpst_recvspace = PIPSIZ;
323 u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
324 u_long	unpdg_recvspace = 4*1024;
325 
326 int	unp_rights;			/* file descriptors in flight */
327 
328 int
329 unp_attach(so)
330 	struct socket *so;
331 {
332 	register struct unpcb *unp;
333 	struct timeval tv;
334 	int error;
335 
336 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
337 		switch (so->so_type) {
338 
339 		case SOCK_STREAM:
340 			error = soreserve(so, unpst_sendspace, unpst_recvspace);
341 			break;
342 
343 		case SOCK_DGRAM:
344 			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
345 			break;
346 
347 		default:
348 			panic("unp_attach");
349 		}
350 		if (error)
351 			return (error);
352 	}
353 	unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT);
354 	if (unp == NULL)
355 		return (ENOBUFS);
356 	bzero((caddr_t)unp, sizeof(*unp));
357 	unp->unp_socket = so;
358 	so->so_pcb = unp;
359 	microtime(&tv);
360 	TIMEVAL_TO_TIMESPEC(&tv, &unp->unp_ctime);
361 	return (0);
362 }
363 
364 void
365 unp_detach(unp)
366 	register struct unpcb *unp;
367 {
368 
369 	if (unp->unp_vnode) {
370 		unp->unp_vnode->v_socket = 0;
371 		vrele(unp->unp_vnode);
372 		unp->unp_vnode = 0;
373 	}
374 	if (unp->unp_conn)
375 		unp_disconnect(unp);
376 	while (unp->unp_refs)
377 		unp_drop(unp->unp_refs, ECONNRESET);
378 	soisdisconnected(unp->unp_socket);
379 	unp->unp_socket->so_pcb = 0;
380 	m_freem(unp->unp_addr);
381 	if (unp_rights) {
382 		/*
383 		 * Normally the receive buffer is flushed later,
384 		 * in sofree, but if our receive buffer holds references
385 		 * to descriptors that are now garbage, we will dispose
386 		 * of those descriptor references after the garbage collector
387 		 * gets them (resulting in a "panic: closef: count < 0").
388 		 */
389 		sorflush(unp->unp_socket);
390 		free(unp, M_PCB);
391 		unp_gc();
392 	} else
393 		free(unp, M_PCB);
394 }
395 
396 int
397 unp_bind(unp, nam, p)
398 	struct unpcb *unp;
399 	struct mbuf *nam;
400 	struct proc *p;
401 {
402 	struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
403 	register struct vnode *vp;
404 	struct vattr vattr;
405 	int error, namelen;
406 	struct nameidata nd;
407 	char buf[MLEN];
408 
409 	if (unp->unp_vnode != NULL)
410 		return (EINVAL);
411 	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
412 	if (namelen <= 0 || namelen >= MLEN)
413 		return EINVAL;
414 	strncpy(buf, soun->sun_path, namelen);
415 	buf[namelen] = 0;       /* null-terminate the string */
416 	NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, buf, p);
417 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
418 	if ((error = namei(&nd)) != 0)
419 		return (error);
420 	vp = nd.ni_vp;
421 	if (vp != NULL) {
422 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
423 		if (nd.ni_dvp == vp)
424 			vrele(nd.ni_dvp);
425 		else
426 			vput(nd.ni_dvp);
427 		vrele(vp);
428 		return (EADDRINUSE);
429 	}
430 	VATTR_NULL(&vattr);
431 	vattr.va_type = VSOCK;
432 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
433 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
434 	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
435 	if (error)
436 		return (error);
437 	vp = nd.ni_vp;
438 	vp->v_socket = unp->unp_socket;
439 	unp->unp_vnode = vp;
440 	unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
441 	VOP_UNLOCK(vp, 0, p);
442 	return (0);
443 }
444 
445 int
446 unp_connect(so, nam, p)
447 	struct socket *so;
448 	struct mbuf *nam;
449 	struct proc *p;
450 {
451 	register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
452 	register struct vnode *vp;
453 	register struct socket *so2, *so3;
454 	struct unpcb *unp2, *unp3;
455 	int error;
456 	struct nameidata nd;
457 
458 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
459 	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
460 		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
461 			return (EMSGSIZE);
462 	} else
463 		*(mtod(nam, caddr_t) + nam->m_len) = 0;
464 	if ((error = namei(&nd)) != 0)
465 		return (error);
466 	vp = nd.ni_vp;
467 	if (vp->v_type != VSOCK) {
468 		error = ENOTSOCK;
469 		goto bad;
470 	}
471 	if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0)
472 		goto bad;
473 	so2 = vp->v_socket;
474 	if (so2 == 0) {
475 		error = ECONNREFUSED;
476 		goto bad;
477 	}
478 	if (so->so_type != so2->so_type) {
479 		error = EPROTOTYPE;
480 		goto bad;
481 	}
482 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
483 		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
484 		    (so3 = sonewconn(so2, 0)) == 0) {
485 			error = ECONNREFUSED;
486 			goto bad;
487 		}
488 		unp2 = sotounpcb(so2);
489 		unp3 = sotounpcb(so3);
490 		if (unp2->unp_addr)
491 			unp3->unp_addr =
492 			    m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
493 		unp3->unp_connid.unp_euid = p->p_ucred->cr_uid;
494 		unp3->unp_connid.unp_egid = p->p_ucred->cr_gid;
495 		unp3->unp_flags |= UNP_FEIDS;
496 		so2 = so3;
497 	}
498 	error = unp_connect2(so, so2);
499 bad:
500 	vput(vp);
501 	return (error);
502 }
503 
504 int
505 unp_connect2(so, so2)
506 	register struct socket *so;
507 	register struct socket *so2;
508 {
509 	register struct unpcb *unp = sotounpcb(so);
510 	register struct unpcb *unp2;
511 
512 	if (so2->so_type != so->so_type)
513 		return (EPROTOTYPE);
514 	unp2 = sotounpcb(so2);
515 	unp->unp_conn = unp2;
516 	switch (so->so_type) {
517 
518 	case SOCK_DGRAM:
519 		unp->unp_nextref = unp2->unp_refs;
520 		unp2->unp_refs = unp;
521 		soisconnected(so);
522 		break;
523 
524 	case SOCK_STREAM:
525 		unp2->unp_conn = unp;
526 		soisconnected(so);
527 		soisconnected(so2);
528 		break;
529 
530 	default:
531 		panic("unp_connect2");
532 	}
533 	return (0);
534 }
535 
536 void
537 unp_disconnect(unp)
538 	struct unpcb *unp;
539 {
540 	register struct unpcb *unp2 = unp->unp_conn;
541 
542 	if (unp2 == 0)
543 		return;
544 	unp->unp_conn = 0;
545 	switch (unp->unp_socket->so_type) {
546 
547 	case SOCK_DGRAM:
548 		if (unp2->unp_refs == unp)
549 			unp2->unp_refs = unp->unp_nextref;
550 		else {
551 			unp2 = unp2->unp_refs;
552 			for (;;) {
553 				if (unp2 == 0)
554 					panic("unp_disconnect");
555 				if (unp2->unp_nextref == unp)
556 					break;
557 				unp2 = unp2->unp_nextref;
558 			}
559 			unp2->unp_nextref = unp->unp_nextref;
560 		}
561 		unp->unp_nextref = 0;
562 		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
563 		break;
564 
565 	case SOCK_STREAM:
566 		soisdisconnected(unp->unp_socket);
567 		unp2->unp_conn = 0;
568 		soisdisconnected(unp2->unp_socket);
569 		break;
570 	}
571 }
572 
573 #ifdef notdef
574 unp_abort(unp)
575 	struct unpcb *unp;
576 {
577 
578 	unp_detach(unp);
579 }
580 #endif
581 
582 void
583 unp_shutdown(unp)
584 	struct unpcb *unp;
585 {
586 	struct socket *so;
587 
588 	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
589 	    (so = unp->unp_conn->unp_socket))
590 		socantrcvmore(so);
591 }
592 
593 void
594 unp_drop(unp, errno)
595 	struct unpcb *unp;
596 	int errno;
597 {
598 	struct socket *so = unp->unp_socket;
599 
600 	so->so_error = errno;
601 	unp_disconnect(unp);
602 	if (so->so_head) {
603 		so->so_pcb = 0;
604 		sofree(so);
605 		m_freem(unp->unp_addr);
606 		free(unp, M_PCB);
607 	}
608 }
609 
610 #ifdef notdef
611 unp_drain()
612 {
613 
614 }
615 #endif
616 
617 int
618 unp_externalize(rights)
619 	struct mbuf *rights;
620 {
621 	struct proc *p = curproc;		/* XXX */
622 	struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
623 	int i, *fdp;
624 	struct file **rp;
625 	struct file *fp;
626 	int nfds, error = 0;
627 
628 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) /
629 	    sizeof(struct file *);
630 	rp = (struct file **)CMSG_DATA(cm);
631 
632 	fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK);
633 
634 #ifdef notyet
635 	/* Make sure the recipient should be able to see the descriptors.. */
636 	if (p->p_cwdi->cwdi_rdir != NULL) {
637 		rp = (struct file **)CMSG_DATA(cm);
638 		for (i = 0; i < nfds; i++) {
639 			fp = *rp++;
640 			/*
641 			 * If we are in a chroot'ed directory, and
642 			 * someone wants to pass us a directory, make
643 			 * sure it's inside the subtree we're allowed
644 			 * to access.
645 			 */
646 			if (fp->f_type == DTYPE_VNODE) {
647 				struct vnode *vp = (struct vnode *)fp->f_data;
648 				if ((vp->v_type == VDIR) &&
649 				    !vn_isunder(vp, p->p_cwdi->cwdi_rdir, p)) {
650 					error = EPERM;
651 					break;
652 				}
653 			}
654 		}
655 	}
656 #endif
657 
658 restart:
659 	if (error != 0) {
660 		rp = ((struct file **)CMSG_DATA(cm));
661 		for (i = 0; i < nfds; i++) {
662 			fp = *rp;
663 			/*
664 			 * zero the pointer before calling unp_discard,
665 			 * since it may end up in unp_gc()..
666 			 */
667 			*rp++ = 0;
668 			unp_discard(fp);
669 		}
670 		goto out;
671 	}
672 
673 	/*
674 	 * First loop -- allocate file descriptor table slots for the
675 	 * new descriptors.
676 	 */
677 	rp = ((struct file **)CMSG_DATA(cm));
678 	for (i = 0; i < nfds; i++) {
679 		bcopy(rp, &fp, sizeof(fp));
680 		rp++;
681 		if ((error = fdalloc(p, 0, &fdp[i])) != 0) {
682 			/*
683 			 * Back out what we've done so far.
684 			 */
685 			for (--i; i >= 0; i--)
686 				fdremove(p->p_fd, fdp[i]);
687 
688 			if (error == ENOSPC) {
689 				fdexpand(p);
690 				error = 0;
691 			} else {
692 				/*
693 				 * This is the error that has historically
694 				 * been returned, and some callers may
695 				 * expect it.
696 				 */
697 				error = EMSGSIZE;
698 			}
699 			goto restart;
700 		}
701 
702 		/*
703 		 * Make the slot reference the descriptor so that
704 		 * fdalloc() works properly.. We finalize it all
705 		 * in the loop below.
706 		 */
707 		p->p_fd->fd_ofiles[fdp[i]] = fp;
708 	}
709 
710 	/*
711 	 * Now that adding them has succeeded, update all of the
712 	 * descriptor passing state.
713 	 */
714 	rp = (struct file **)CMSG_DATA(cm);
715 	for (i = 0; i < nfds; i++) {
716 		fp = *rp++;
717 		fp->f_msgcount--;
718 		unp_rights--;
719 	}
720 
721 	/*
722 	 * Copy temporary array to message and adjust length, in case of
723 	 * transition from large struct file pointers to ints.
724 	 */
725 	memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int));
726 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(int));
727 	rights->m_len = CMSG_SPACE(nfds * sizeof(int));
728  out:
729 	free(fdp, M_TEMP);
730 	return (error);
731 }
732 
733 int
734 unp_internalize(control, p)
735 	struct mbuf *control;
736 	struct proc *p;
737 {
738 	struct filedesc *fdp = p->p_fd;
739 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
740 	struct file **rp, *fp;
741 	int i, error;
742 	int nfds, *ip, fd, neededspace;
743 
744 	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
745 	    cm->cmsg_len != control->m_len)
746 		return (EINVAL);
747 	nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
748 
749 	/* Make sure we have room for the struct file pointers */
750 morespace:
751 	neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) -
752 	    control->m_len;
753 	if (neededspace > M_TRAILINGSPACE(control)) {
754 		/* if we already have a cluster, the message is just too big */
755 		if (control->m_flags & M_EXT)
756 			return (E2BIG);
757 
758 		/* allocate a cluster and try again */
759 		MCLGET(control, M_WAIT);
760 		if ((control->m_flags & M_EXT) == 0)
761 			return (ENOBUFS);       /* allocation failed */
762 
763 		/* copy the data to the cluster */
764 		memcpy(mtod(control, char *), cm, cm->cmsg_len);
765 		cm = mtod(control, struct cmsghdr *);
766 		goto morespace;
767 	}
768 
769 	/* adjust message & mbuf to note amount of space actually used. */
770 	cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *));
771 	control->m_len = CMSG_SPACE(nfds * sizeof(struct file *));
772 
773 	ip = ((int *)CMSG_DATA(cm)) + nfds - 1;
774 	rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1;
775 	for (i = 0; i < nfds; i++) {
776 		bcopy(ip, &fd, sizeof fd);
777 		ip--;
778 		if ((fp = fd_getfile(fdp, fd)) == NULL) {
779 			error = EBADF;
780 			goto fail;
781 		}
782 		if (fp->f_count == LONG_MAX-2 ||
783 		    fp->f_msgcount == LONG_MAX-2) {
784 			error = EDEADLK;
785 			goto fail;
786 		}
787 		bcopy(&fp, rp, sizeof fp);
788 		rp--;
789 		fp->f_count++;
790 		fp->f_msgcount++;
791 		unp_rights++;
792 	}
793 	return (0);
794 fail:
795 	/* Back out what we just did. */
796 	for ( ; i > 0; i--) {
797 		bcopy(rp, &fp, sizeof(fp));
798 		rp++;
799 		fp->f_count--;
800 		fp->f_msgcount--;
801 		unp_rights--;
802 	}
803 
804 	return (error);
805 }
806 
807 int	unp_defer, unp_gcing;
808 extern	struct domain unixdomain;
809 
810 void
811 unp_gc()
812 {
813 	register struct file *fp, *nextfp;
814 	register struct socket *so;
815 	struct file **extra_ref, **fpp;
816 	int nunref, i;
817 
818 	if (unp_gcing)
819 		return;
820 	unp_gcing = 1;
821 	unp_defer = 0;
822 	LIST_FOREACH(fp, &filehead, f_list)
823 		fp->f_flag &= ~(FMARK|FDEFER);
824 	do {
825 		LIST_FOREACH(fp, &filehead, f_list) {
826 			if (fp->f_flag & FDEFER) {
827 				fp->f_flag &= ~FDEFER;
828 				unp_defer--;
829 			} else {
830 				if (fp->f_count == 0)
831 					continue;
832 				if (fp->f_flag & FMARK)
833 					continue;
834 				if (fp->f_count == fp->f_msgcount)
835 					continue;
836 			}
837 			fp->f_flag |= FMARK;
838 
839 			if (fp->f_type != DTYPE_SOCKET ||
840 			    (so = (struct socket *)fp->f_data) == 0)
841 				continue;
842 			if (so->so_proto->pr_domain != &unixdomain ||
843 			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
844 				continue;
845 #ifdef notdef
846 			if (so->so_rcv.sb_flags & SB_LOCK) {
847 				/*
848 				 * This is problematical; it's not clear
849 				 * we need to wait for the sockbuf to be
850 				 * unlocked (on a uniprocessor, at least),
851 				 * and it's also not clear what to do
852 				 * if sbwait returns an error due to receipt
853 				 * of a signal.  If sbwait does return
854 				 * an error, we'll go into an infinite
855 				 * loop.  Delete all of this for now.
856 				 */
857 				(void) sbwait(&so->so_rcv);
858 				goto restart;
859 			}
860 #endif
861 			unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
862 		}
863 	} while (unp_defer);
864 	/*
865 	 * We grab an extra reference to each of the file table entries
866 	 * that are not otherwise accessible and then free the rights
867 	 * that are stored in messages on them.
868 	 *
869 	 * The bug in the original code is a little tricky, so I'll describe
870 	 * what's wrong with it here.
871 	 *
872 	 * It is incorrect to simply unp_discard each entry for f_msgcount
873 	 * times -- consider the case of sockets A and B that contain
874 	 * references to each other.  On a last close of some other socket,
875 	 * we trigger a gc since the number of outstanding rights (unp_rights)
876 	 * is non-zero.  If during the sweep phase the gc code un_discards,
877 	 * we end up doing a (full) closef on the descriptor.  A closef on A
878 	 * results in the following chain.  Closef calls soo_close, which
879 	 * calls soclose.   Soclose calls first (through the switch
880 	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
881 	 * returns because the previous instance had set unp_gcing, and
882 	 * we return all the way back to soclose, which marks the socket
883 	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
884 	 * to free up the rights that are queued in messages on the socket A,
885 	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
886 	 * switch unp_dispose, which unp_scans with unp_discard.  This second
887 	 * instance of unp_discard just calls closef on B.
888 	 *
889 	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
890 	 * which results in another closef on A.  Unfortunately, A is already
891 	 * being closed, and the descriptor has already been marked with
892 	 * SS_NOFDREF, and soclose panics at this point.
893 	 *
894 	 * Here, we first take an extra reference to each inaccessible
895 	 * descriptor.  Then, we call sorflush ourself, since we know
896 	 * it is a Unix domain socket anyhow.  After we destroy all the
897 	 * rights carried in messages, we do a last closef to get rid
898 	 * of our extra reference.  This is the last close, and the
899 	 * unp_detach etc will shut down the socket.
900 	 *
901 	 * 91/09/19, bsy@cs.cmu.edu
902 	 */
903 	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
904 	for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0;
905 	    fp = nextfp) {
906 		nextfp = LIST_NEXT(fp, f_list);
907 		if (fp->f_count == 0)
908 			continue;
909 		if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
910 			*fpp++ = fp;
911 			nunref++;
912 			FREF(fp);
913 			fp->f_count++;
914 		}
915 	}
916 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
917 	        if ((*fpp)->f_type == DTYPE_SOCKET && (*fpp)->f_data != NULL)
918 		        sorflush((struct socket *)(*fpp)->f_data);
919 	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
920 		(void) closef(*fpp, NULL);
921 	free((caddr_t)extra_ref, M_FILE);
922 	unp_gcing = 0;
923 }
924 
925 void
926 unp_dispose(m)
927 	struct mbuf *m;
928 {
929 
930 	if (m)
931 		unp_scan(m, unp_discard, 1);
932 }
933 
934 void
935 unp_scan(m0, op, discard)
936 	struct mbuf *m0;
937 	void (*op)(struct file *);
938 	int discard;
939 {
940 	struct mbuf *m;
941 	struct file **rp, *fp;
942 	struct cmsghdr *cm;
943 	int i;
944 	int qfds;
945 
946 	while (m0) {
947 		for (m = m0; m; m = m->m_next) {
948 			if (m->m_type == MT_CONTROL &&
949 			    m->m_len >= sizeof(*cm)) {
950 				cm = mtod(m, struct cmsghdr *);
951 				if (cm->cmsg_level != SOL_SOCKET ||
952 				    cm->cmsg_type != SCM_RIGHTS)
953 					continue;
954 				qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm))
955 				    / sizeof(struct file *);
956 				rp = (struct file **)CMSG_DATA(cm);
957 				for (i = 0; i < qfds; i++) {
958 					fp = *rp;
959 					if (discard)
960 						*rp = 0;
961 					(*op)(fp);
962 					rp++;
963 				}
964 				break;		/* XXX, but saves time */
965 			}
966 		}
967 		m0 = m0->m_nextpkt;
968 	}
969 }
970 
971 void
972 unp_mark(fp)
973 	struct file *fp;
974 {
975 
976 	if (fp->f_flag & FMARK)
977 		return;
978 
979 	if (fp->f_flag & FDEFER)
980 		return;
981 
982 	if (fp->f_type == DTYPE_SOCKET) {
983 		unp_defer++;
984 		fp->f_flag |= FDEFER;
985 	} else {
986 		fp->f_flag |= FMARK;
987 	}
988 }
989 
990 void
991 unp_discard(fp)
992 	struct file *fp;
993 {
994 
995 	if (fp == NULL)
996 		return;
997 	FREF(fp);
998 	fp->f_msgcount--;
999 	unp_rights--;
1000 	(void) closef(fp, NULL);
1001 }
1002