xref: /openbsd/sys/kern/uipc_syscalls.c (revision 771fbea0)
1 /*	$OpenBSD: uipc_syscalls.c,v 1.192 2021/06/02 11:30:23 mvs Exp $	*/
2 /*	$NetBSD: uipc_syscalls.c,v 1.19 1996/02/09 19:00:48 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/filedesc.h>
38 #include <sys/proc.h>
39 #include <sys/fcntl.h>
40 #include <sys/file.h>
41 #include <sys/ioctl.h>
42 #include <sys/malloc.h>
43 #include <sys/event.h>
44 #include <sys/mbuf.h>
45 #include <sys/protosw.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/signalvar.h>
49 #include <sys/pledge.h>
50 #include <sys/unpcb.h>
51 #include <sys/un.h>
52 #ifdef KTRACE
53 #include <sys/ktrace.h>
54 #endif
55 
56 #include <sys/mount.h>
57 #include <sys/syscallargs.h>
58 
59 #include <sys/domain.h>
60 #include <netinet/in.h>
61 #include <net/route.h>
62 
63 int	copyaddrout(struct proc *, struct mbuf *, struct sockaddr *, socklen_t,
64 	    socklen_t *);
65 
66 int
67 sys_socket(struct proc *p, void *v, register_t *retval)
68 {
69 	struct sys_socket_args /* {
70 		syscallarg(int) domain;
71 		syscallarg(int) type;
72 		syscallarg(int) protocol;
73 	} */ *uap = v;
74 	struct filedesc *fdp = p->p_fd;
75 	struct socket *so;
76 	struct file *fp;
77 	int type = SCARG(uap, type);
78 	int domain = SCARG(uap, domain);
79 	int fd, cloexec, nonblock, fflag, error;
80 	unsigned int ss = 0;
81 
82 	if ((type & SOCK_DNS) && !(domain == AF_INET || domain == AF_INET6))
83 		return (EINVAL);
84 
85 	if (ISSET(type, SOCK_DNS))
86 		ss |= SS_DNS;
87 	error = pledge_socket(p, domain, ss);
88 	if (error)
89 		return (error);
90 
91 	type &= ~(SOCK_CLOEXEC | SOCK_NONBLOCK | SOCK_DNS);
92 	cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
93 	nonblock = SCARG(uap, type) & SOCK_NONBLOCK;
94 	fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0);
95 
96 	error = socreate(SCARG(uap, domain), &so, type, SCARG(uap, protocol));
97 	if (error)
98 		return (error);
99 
100 	fdplock(fdp);
101 	error = falloc(p, &fp, &fd);
102 	if (error) {
103 		fdpunlock(fdp);
104 		soclose(so, MSG_DONTWAIT);
105 	} else {
106 		fp->f_flag = fflag;
107 		fp->f_type = DTYPE_SOCKET;
108 		fp->f_ops = &socketops;
109 		so->so_state |= ss;
110 		fp->f_data = so;
111 		fdinsert(fdp, fd, cloexec, fp);
112 		fdpunlock(fdp);
113 		FRELE(fp, p);
114 		*retval = fd;
115 	}
116 	return (error);
117 }
118 
119 static inline int
120 isdnssocket(struct socket *so)
121 {
122 	return (so->so_state & SS_DNS);
123 }
124 
125 /* For SS_DNS sockets, only allow port DNS (port 53) */
126 static int
127 dns_portcheck(struct proc *p, struct socket *so, void *nam, u_int *namelen)
128 {
129 	int error = EINVAL;
130 
131 	switch (so->so_proto->pr_domain->dom_family) {
132 	case AF_INET:
133 		if (*namelen < sizeof(struct sockaddr_in))
134 			break;
135 		if (((struct sockaddr_in *)nam)->sin_port == htons(53))
136 			error = 0;
137 		break;
138 #ifdef INET6
139 	case AF_INET6:
140 		if (*namelen < sizeof(struct sockaddr_in6))
141 			break;
142 		if (((struct sockaddr_in6 *)nam)->sin6_port == htons(53))
143 			error = 0;
144 #endif
145 	}
146 	if (error && p->p_p->ps_flags & PS_PLEDGE)
147 		return (pledge_fail(p, EPERM, PLEDGE_DNS));
148 	return error;
149 }
150 
151 int
152 sys_bind(struct proc *p, void *v, register_t *retval)
153 {
154 	struct sys_bind_args /* {
155 		syscallarg(int) s;
156 		syscallarg(const struct sockaddr *) name;
157 		syscallarg(socklen_t) namelen;
158 	} */ *uap = v;
159 	struct file *fp;
160 	struct mbuf *nam;
161 	struct socket *so;
162 	int s, error;
163 
164 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
165 		return (error);
166 	so = fp->f_data;
167 	error = pledge_socket(p, so->so_proto->pr_domain->dom_family,
168 	    so->so_state);
169 	if (error)
170 		goto out;
171 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
172 	    MT_SONAME);
173 	if (error)
174 		goto out;
175 #ifdef KTRACE
176 	if (KTRPOINT(p, KTR_STRUCT))
177 		ktrsockaddr(p, mtod(nam, caddr_t), SCARG(uap, namelen));
178 #endif
179 	s = solock(so);
180 	error = sobind(so, nam, p);
181 	sounlock(so, s);
182 	m_freem(nam);
183 out:
184 	FRELE(fp, p);
185 	return (error);
186 }
187 
188 int
189 sys_listen(struct proc *p, void *v, register_t *retval)
190 {
191 	struct sys_listen_args /* {
192 		syscallarg(int) s;
193 		syscallarg(int) backlog;
194 	} */ *uap = v;
195 	struct file *fp;
196 	struct socket *so;
197 	int s, error;
198 
199 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
200 		return (error);
201 	so = fp->f_data;
202 	s = solock(so);
203 	error = solisten(so, SCARG(uap, backlog));
204 	sounlock(so, s);
205 	FRELE(fp, p);
206 	return (error);
207 }
208 
209 int
210 sys_accept(struct proc *p, void *v, register_t *retval)
211 {
212 	struct sys_accept_args /* {
213 		syscallarg(int) s;
214 		syscallarg(struct sockaddr *) name;
215 		syscallarg(socklen_t *) anamelen;
216 	} */ *uap = v;
217 
218 	return (doaccept(p, SCARG(uap, s), SCARG(uap, name),
219 	    SCARG(uap, anamelen), SOCK_NONBLOCK_INHERIT, retval));
220 }
221 
222 int
223 sys_accept4(struct proc *p, void *v, register_t *retval)
224 {
225 	struct sys_accept4_args /* {
226 		syscallarg(int) s;
227 		syscallarg(struct sockaddr *) name;
228 		syscallarg(socklen_t *) anamelen;
229 		syscallarg(socklen_t *) int flags;
230 	} */ *uap = v;
231 
232 	if (SCARG(uap, flags) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
233 		return (EINVAL);
234 
235 	return (doaccept(p, SCARG(uap, s), SCARG(uap, name),
236 	    SCARG(uap, anamelen), SCARG(uap, flags), retval));
237 }
238 
239 int
240 doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen,
241     int flags, register_t *retval)
242 {
243 	struct filedesc *fdp = p->p_fd;
244 	struct file *fp, *headfp;
245 	struct mbuf *nam;
246 	socklen_t namelen;
247 	int error, s, tmpfd;
248 	struct socket *head, *so;
249 	int cloexec, nflag;
250 
251 	cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
252 
253 	if (name && (error = copyin(anamelen, &namelen, sizeof (namelen))))
254 		return (error);
255 	if ((error = getsock(p, sock, &fp)) != 0)
256 		return (error);
257 
258 	headfp = fp;
259 
260 	fdplock(fdp);
261 	error = falloc(p, &fp, &tmpfd);
262 	fdpunlock(fdp);
263 	if (error) {
264 		FRELE(headfp, p);
265 		return (error);
266 	}
267 
268 	nam = m_get(M_WAIT, MT_SONAME);
269 
270 	head = headfp->f_data;
271 	s = solock(head);
272 	if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) {
273 		error = EINVAL;
274 		goto out;
275 	}
276 	if ((headfp->f_flag & FNONBLOCK) && head->so_qlen == 0) {
277 		if (head->so_state & SS_CANTRCVMORE)
278 			error = ECONNABORTED;
279 		else
280 			error = EWOULDBLOCK;
281 		goto out;
282 	}
283 	while (head->so_qlen == 0 && head->so_error == 0) {
284 		if (head->so_state & SS_CANTRCVMORE) {
285 			head->so_error = ECONNABORTED;
286 			break;
287 		}
288 		error = sosleep_nsec(head, &head->so_timeo, PSOCK | PCATCH,
289 		    "netcon", INFSLP);
290 		if (error)
291 			goto out;
292 	}
293 	if (head->so_error) {
294 		error = head->so_error;
295 		head->so_error = 0;
296 		goto out;
297 	}
298 
299 	/*
300 	 * Do not sleep after we have taken the socket out of the queue.
301 	 */
302 	so = TAILQ_FIRST(&head->so_q);
303 	if (soqremque(so, 1) == 0)
304 		panic("accept");
305 
306 	/* Figure out whether the new socket should be non-blocking. */
307 	nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK)
308 	    : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0);
309 
310 	/* connection has been removed from the listen queue */
311 	KNOTE(&head->so_rcv.sb_sel.si_note, 0);
312 
313 	fp->f_type = DTYPE_SOCKET;
314 	fp->f_flag = FREAD | FWRITE | nflag;
315 	fp->f_ops = &socketops;
316 	fp->f_data = so;
317 	error = soaccept(so, nam);
318 	if (!error && name != NULL)
319 		error = copyaddrout(p, nam, name, namelen, anamelen);
320 out:
321 	if (!error) {
322 		sounlock(head, s);
323 		fdplock(fdp);
324 		fdinsert(fdp, tmpfd, cloexec, fp);
325 		fdpunlock(fdp);
326 		FRELE(fp, p);
327 		*retval = tmpfd;
328 	} else {
329 		sounlock(head, s);
330 		fdplock(fdp);
331 		fdremove(fdp, tmpfd);
332 		fdpunlock(fdp);
333 		closef(fp, p);
334 	}
335 
336 	m_freem(nam);
337 	FRELE(headfp, p);
338 	return (error);
339 }
340 
341 int
342 sys_connect(struct proc *p, void *v, register_t *retval)
343 {
344 	struct sys_connect_args /* {
345 		syscallarg(int) s;
346 		syscallarg(const struct sockaddr *) name;
347 		syscallarg(socklen_t) namelen;
348 	} */ *uap = v;
349 	struct file *fp;
350 	struct socket *so;
351 	struct mbuf *nam = NULL;
352 	int error, s, interrupted = 0;
353 
354 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
355 		return (error);
356 	so = fp->f_data;
357 	s = solock(so);
358 	if (so->so_state & SS_ISCONNECTING) {
359 		error = EALREADY;
360 		goto out;
361 	}
362 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
363 	    MT_SONAME);
364 	if (error)
365 		goto out;
366 	error = pledge_socket(p, so->so_proto->pr_domain->dom_family,
367 	    so->so_state);
368 	if (error)
369 		goto out;
370 #ifdef KTRACE
371 	if (KTRPOINT(p, KTR_STRUCT))
372 		ktrsockaddr(p, mtod(nam, caddr_t), SCARG(uap, namelen));
373 #endif
374 
375 	if (isdnssocket(so)) {
376 		u_int namelen = nam->m_len;
377 		error = dns_portcheck(p, so, mtod(nam, void *), &namelen);
378 		if (error)
379 			goto out;
380 		nam->m_len = namelen;
381 	}
382 
383 	error = soconnect(so, nam);
384 	if (error)
385 		goto bad;
386 	if ((fp->f_flag & FNONBLOCK) && (so->so_state & SS_ISCONNECTING)) {
387 		error = EINPROGRESS;
388 		goto out;
389 	}
390 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
391 		error = sosleep_nsec(so, &so->so_timeo, PSOCK | PCATCH,
392 		    "netcon2", INFSLP);
393 		if (error) {
394 			if (error == EINTR || error == ERESTART)
395 				interrupted = 1;
396 			break;
397 		}
398 	}
399 	if (error == 0) {
400 		error = so->so_error;
401 		so->so_error = 0;
402 	}
403 bad:
404 	if (!interrupted)
405 		so->so_state &= ~SS_ISCONNECTING;
406 out:
407 	sounlock(so, s);
408 	FRELE(fp, p);
409 	m_freem(nam);
410 	if (error == ERESTART)
411 		error = EINTR;
412 	return (error);
413 }
414 
415 int
416 sys_socketpair(struct proc *p, void *v, register_t *retval)
417 {
418 	struct sys_socketpair_args /* {
419 		syscallarg(int) domain;
420 		syscallarg(int) type;
421 		syscallarg(int) protocol;
422 		syscallarg(int *) rsv;
423 	} */ *uap = v;
424 	struct filedesc *fdp = p->p_fd;
425 	struct file *fp1 = NULL, *fp2 = NULL;
426 	struct socket *so1, *so2;
427 	int type, cloexec, nonblock, fflag, error, sv[2];
428 
429 	type  = SCARG(uap, type) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK);
430 	cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
431 	nonblock = SCARG(uap, type) & SOCK_NONBLOCK;
432 	fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0);
433 
434 	error = socreate(SCARG(uap, domain), &so1, type, SCARG(uap, protocol));
435 	if (error)
436 		return (error);
437 	error = socreate(SCARG(uap, domain), &so2, type, SCARG(uap, protocol));
438 	if (error)
439 		goto free1;
440 
441 	error = soconnect2(so1, so2);
442 	if (error != 0)
443 		goto free2;
444 
445 	if ((SCARG(uap, type) & SOCK_TYPE_MASK) == SOCK_DGRAM) {
446 		/*
447 		 * Datagram socket connection is asymmetric.
448 		 */
449 		error = soconnect2(so2, so1);
450 		if (error != 0)
451 			goto free2;
452 	}
453 	fdplock(fdp);
454 	if ((error = falloc(p, &fp1, &sv[0])) != 0)
455 		goto free3;
456 	fp1->f_flag = fflag;
457 	fp1->f_type = DTYPE_SOCKET;
458 	fp1->f_ops = &socketops;
459 	fp1->f_data = so1;
460 	if ((error = falloc(p, &fp2, &sv[1])) != 0)
461 		goto free4;
462 	fp2->f_flag = fflag;
463 	fp2->f_type = DTYPE_SOCKET;
464 	fp2->f_ops = &socketops;
465 	fp2->f_data = so2;
466 	error = copyout(sv, SCARG(uap, rsv), 2 * sizeof (int));
467 	if (error == 0) {
468 		fdinsert(fdp, sv[0], cloexec, fp1);
469 		fdinsert(fdp, sv[1], cloexec, fp2);
470 		fdpunlock(fdp);
471 #ifdef KTRACE
472 		if (KTRPOINT(p, KTR_STRUCT))
473 			ktrfds(p, sv, 2);
474 #endif
475 		FRELE(fp1, p);
476 		FRELE(fp2, p);
477 		return (0);
478 	}
479 	fdremove(fdp, sv[1]);
480 free4:
481 	fdremove(fdp, sv[0]);
482 free3:
483 	fdpunlock(fdp);
484 
485 	if (fp2 != NULL) {
486 		closef(fp2, p);
487 		so2 = NULL;
488 	}
489 	if (fp1 != NULL) {
490 		closef(fp1, p);
491 		so1 = NULL;
492 	}
493 free2:
494 	if (so2 != NULL)
495 		(void)soclose(so2, 0);
496 free1:
497 	if (so1 != NULL)
498 		(void)soclose(so1, 0);
499 	return (error);
500 }
501 
502 int
503 sys_sendto(struct proc *p, void *v, register_t *retval)
504 {
505 	struct sys_sendto_args /* {
506 		syscallarg(int) s;
507 		syscallarg(const void *) buf;
508 		syscallarg(size_t) len;
509 		syscallarg(int) flags;
510 		syscallarg(const struct sockaddr *) to;
511 		syscallarg(socklen_t) tolen;
512 	} */ *uap = v;
513 	struct msghdr msg;
514 	struct iovec aiov;
515 
516 	msg.msg_name = (caddr_t)SCARG(uap, to);
517 	msg.msg_namelen = SCARG(uap, tolen);
518 	msg.msg_iov = &aiov;
519 	msg.msg_iovlen = 1;
520 	msg.msg_control = 0;
521 	msg.msg_flags = 0;
522 	aiov.iov_base = (char *)SCARG(uap, buf);
523 	aiov.iov_len = SCARG(uap, len);
524 	return (sendit(p, SCARG(uap, s), &msg, SCARG(uap, flags), retval));
525 }
526 
527 int
528 sys_sendmsg(struct proc *p, void *v, register_t *retval)
529 {
530 	struct sys_sendmsg_args /* {
531 		syscallarg(int) s;
532 		syscallarg(const struct msghdr *) msg;
533 		syscallarg(int) flags;
534 	} */ *uap = v;
535 	struct msghdr msg;
536 	struct iovec aiov[UIO_SMALLIOV], *iov;
537 	int error;
538 
539 	error = copyin(SCARG(uap, msg), &msg, sizeof (msg));
540 	if (error)
541 		return (error);
542 #ifdef KTRACE
543 	if (KTRPOINT(p, KTR_STRUCT))
544 		ktrmsghdr(p, &msg);
545 #endif
546 
547 	if (msg.msg_iovlen > IOV_MAX)
548 		return (EMSGSIZE);
549 	if (msg.msg_iovlen > UIO_SMALLIOV)
550 		iov = mallocarray(msg.msg_iovlen, sizeof(struct iovec),
551 		    M_IOV, M_WAITOK);
552 	else
553 		iov = aiov;
554 	if (msg.msg_iovlen &&
555 	    (error = copyin(msg.msg_iov, iov,
556 		    msg.msg_iovlen * sizeof (struct iovec))))
557 		goto done;
558 #ifdef KTRACE
559 	if (msg.msg_iovlen && KTRPOINT(p, KTR_STRUCT))
560 		ktriovec(p, iov, msg.msg_iovlen);
561 #endif
562 	msg.msg_iov = iov;
563 	msg.msg_flags = 0;
564 	error = sendit(p, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
565 done:
566 	if (iov != aiov)
567 		free(iov, M_IOV, sizeof(struct iovec) * msg.msg_iovlen);
568 	return (error);
569 }
570 
571 int
572 sendit(struct proc *p, int s, struct msghdr *mp, int flags, register_t *retsize)
573 {
574 	struct file *fp;
575 	struct uio auio;
576 	struct iovec *iov;
577 	int i;
578 	struct mbuf *to, *control;
579 	struct socket *so;
580 	size_t len;
581 	int error;
582 #ifdef KTRACE
583 	struct iovec *ktriov = NULL;
584 	int iovlen = 0;
585 #endif
586 
587 	to = NULL;
588 
589 	if ((error = getsock(p, s, &fp)) != 0)
590 		return (error);
591 	so = fp->f_data;
592 	if (fp->f_flag & FNONBLOCK)
593 		flags |= MSG_DONTWAIT;
594 
595 	error = pledge_sendit(p, mp->msg_name);
596 	if (error)
597 		goto bad;
598 
599 	auio.uio_iov = mp->msg_iov;
600 	auio.uio_iovcnt = mp->msg_iovlen;
601 	auio.uio_segflg = UIO_USERSPACE;
602 	auio.uio_rw = UIO_WRITE;
603 	auio.uio_procp = p;
604 	auio.uio_offset = 0;			/* XXX */
605 	auio.uio_resid = 0;
606 	iov = mp->msg_iov;
607 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
608 		/* Don't allow sum > SSIZE_MAX */
609 		if (iov->iov_len > SSIZE_MAX ||
610 		    (auio.uio_resid += iov->iov_len) > SSIZE_MAX) {
611 			error = EINVAL;
612 			goto bad;
613 		}
614 	}
615 	if (mp->msg_name) {
616 		error = sockargs(&to, mp->msg_name, mp->msg_namelen,
617 		    MT_SONAME);
618 		if (error)
619 			goto bad;
620 		if (isdnssocket(so)) {
621 			u_int namelen = mp->msg_namelen;
622 			error = dns_portcheck(p, so, mtod(to, caddr_t),
623 			    &namelen);
624 			if (error)
625 				goto bad;
626 			mp->msg_namelen = namelen;
627 		}
628 #ifdef KTRACE
629 		if (KTRPOINT(p, KTR_STRUCT))
630 			ktrsockaddr(p, mtod(to, caddr_t), mp->msg_namelen);
631 #endif
632 	}
633 	if (mp->msg_control) {
634 		if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) {
635 			error = EINVAL;
636 			goto bad;
637 		}
638 		error = sockargs(&control, mp->msg_control,
639 		    mp->msg_controllen, MT_CONTROL);
640 		if (error)
641 			goto bad;
642 #ifdef KTRACE
643 		if (KTRPOINT(p, KTR_STRUCT) && mp->msg_controllen)
644 			ktrcmsghdr(p, mtod(control, char *),
645 			    mp->msg_controllen);
646 #endif
647 	} else
648 		control = NULL;
649 #ifdef KTRACE
650 	if (KTRPOINT(p, KTR_GENIO)) {
651 		ktriov = mallocarray(auio.uio_iovcnt, sizeof(struct iovec),
652 		    M_TEMP, M_WAITOK);
653 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
654 
655 		memcpy(ktriov, auio.uio_iov, iovlen);
656 	}
657 #endif
658 	len = auio.uio_resid;
659 	error = sosend(so, to, &auio, NULL, control, flags);
660 	if (error) {
661 		if (auio.uio_resid != len && (error == ERESTART ||
662 		    error == EINTR || error == EWOULDBLOCK))
663 			error = 0;
664 		if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0) {
665 			KERNEL_LOCK();
666 			ptsignal(p, SIGPIPE, STHREAD);
667 			KERNEL_UNLOCK();
668 		}
669 	}
670 	if (error == 0) {
671 		*retsize = len - auio.uio_resid;
672 		mtx_enter(&fp->f_mtx);
673 		fp->f_wxfer++;
674 		fp->f_wbytes += *retsize;
675 		mtx_leave(&fp->f_mtx);
676 	}
677 #ifdef KTRACE
678 	if (ktriov != NULL) {
679 		if (error == 0)
680 			ktrgenio(p, s, UIO_WRITE, ktriov, *retsize);
681 		free(ktriov, M_TEMP, iovlen);
682 	}
683 #endif
684 bad:
685 	FRELE(fp, p);
686 	m_freem(to);
687 	return (error);
688 }
689 
690 int
691 sys_recvfrom(struct proc *p, void *v, register_t *retval)
692 {
693 	struct sys_recvfrom_args /* {
694 		syscallarg(int) s;
695 		syscallarg(void *) buf;
696 		syscallarg(size_t) len;
697 		syscallarg(int) flags;
698 		syscallarg(struct sockaddr *) from;
699 		syscallarg(socklen_t *) fromlenaddr;
700 	} */ *uap = v;
701 	struct msghdr msg;
702 	struct iovec aiov;
703 	int error;
704 
705 	if (SCARG(uap, fromlenaddr)) {
706 		error = copyin(SCARG(uap, fromlenaddr),
707 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
708 		if (error)
709 			return (error);
710 	} else
711 		msg.msg_namelen = 0;
712 	msg.msg_name = (caddr_t)SCARG(uap, from);
713 	msg.msg_iov = &aiov;
714 	msg.msg_iovlen = 1;
715 	aiov.iov_base = SCARG(uap, buf);
716 	aiov.iov_len = SCARG(uap, len);
717 	msg.msg_control = 0;
718 	msg.msg_flags = SCARG(uap, flags);
719 	return (recvit(p, SCARG(uap, s), &msg,
720 	    (caddr_t)SCARG(uap, fromlenaddr), retval));
721 }
722 
723 int
724 sys_recvmsg(struct proc *p, void *v, register_t *retval)
725 {
726 	struct sys_recvmsg_args /* {
727 		syscallarg(int) s;
728 		syscallarg(struct msghdr *) msg;
729 		syscallarg(int) flags;
730 	} */ *uap = v;
731 	struct msghdr msg;
732 	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
733 	int error;
734 
735 	error = copyin(SCARG(uap, msg), &msg, sizeof (msg));
736 	if (error)
737 		return (error);
738 
739 	if (msg.msg_iovlen > IOV_MAX)
740 		return (EMSGSIZE);
741 	if (msg.msg_iovlen > UIO_SMALLIOV)
742 		iov = mallocarray(msg.msg_iovlen, sizeof(struct iovec),
743 		    M_IOV, M_WAITOK);
744 	else
745 		iov = aiov;
746 	msg.msg_flags = SCARG(uap, flags);
747 	if (msg.msg_iovlen > 0) {
748 		error = copyin(msg.msg_iov, iov,
749 		    msg.msg_iovlen * sizeof(struct iovec));
750 		if (error)
751 			goto done;
752 	}
753 	uiov = msg.msg_iov;
754 	msg.msg_iov = iov;
755 	if ((error = recvit(p, SCARG(uap, s), &msg, NULL, retval)) == 0) {
756 		msg.msg_iov = uiov;
757 #ifdef KTRACE
758 		if (KTRPOINT(p, KTR_STRUCT)) {
759 			ktrmsghdr(p, &msg);
760 			if (msg.msg_iovlen)
761 				ktriovec(p, iov, msg.msg_iovlen);
762 		}
763 #endif
764 		error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
765 	}
766 done:
767 	if (iov != aiov)
768 		free(iov, M_IOV, sizeof(struct iovec) * msg.msg_iovlen);
769 	return (error);
770 }
771 
772 int
773 recvit(struct proc *p, int s, struct msghdr *mp, caddr_t namelenp,
774     register_t *retsize)
775 {
776 	struct file *fp;
777 	struct uio auio;
778 	struct iovec *iov;
779 	int i;
780 	size_t len;
781 	int error;
782 	struct mbuf *from = NULL, *control = NULL;
783 #ifdef KTRACE
784 	struct iovec *ktriov = NULL;
785 	int iovlen = 0, kmsgflags;
786 #endif
787 
788 	if ((error = getsock(p, s, &fp)) != 0)
789 		return (error);
790 
791 	auio.uio_iov = mp->msg_iov;
792 	auio.uio_iovcnt = mp->msg_iovlen;
793 	auio.uio_segflg = UIO_USERSPACE;
794 	auio.uio_rw = UIO_READ;
795 	auio.uio_procp = p;
796 	auio.uio_offset = 0;			/* XXX */
797 	auio.uio_resid = 0;
798 	iov = mp->msg_iov;
799 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
800 		/* Don't allow sum > SSIZE_MAX */
801 		if (iov->iov_len > SSIZE_MAX ||
802 		    (auio.uio_resid += iov->iov_len) > SSIZE_MAX) {
803 			error = EINVAL;
804 			goto out;
805 		}
806 	}
807 #ifdef KTRACE
808 	if (KTRPOINT(p, KTR_GENIO)) {
809 		ktriov = mallocarray(auio.uio_iovcnt, sizeof(struct iovec),
810 		    M_TEMP, M_WAITOK);
811 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
812 
813 		memcpy(ktriov, auio.uio_iov, iovlen);
814 	}
815 	kmsgflags = mp->msg_flags;
816 #endif
817 	len = auio.uio_resid;
818 	if (fp->f_flag & FNONBLOCK)
819 		mp->msg_flags |= MSG_DONTWAIT;
820 	error = soreceive(fp->f_data, &from, &auio, NULL,
821 			  mp->msg_control ? &control : NULL,
822 			  &mp->msg_flags,
823 			  mp->msg_control ? mp->msg_controllen : 0);
824 	if (error) {
825 		if (auio.uio_resid != len && (error == ERESTART ||
826 		    error == EINTR || error == EWOULDBLOCK))
827 			error = 0;
828 	}
829 #ifdef KTRACE
830 	if (ktriov != NULL) {
831 		if (error == 0)
832 			ktrgenio(p, s, UIO_READ, ktriov, len - auio.uio_resid);
833 		free(ktriov, M_TEMP, iovlen);
834 	}
835 #endif
836 	if (error)
837 		goto out;
838 	*retsize = len - auio.uio_resid;
839 	if (mp->msg_name) {
840 		socklen_t alen;
841 
842 		if (from == NULL)
843 			alen = 0;
844 		else {
845 			alen = from->m_len;
846 			error = copyout(mtod(from, caddr_t), mp->msg_name,
847 			    MIN(alen, mp->msg_namelen));
848 			if (error)
849 				goto out;
850 #ifdef KTRACE
851 			if (KTRPOINT(p, KTR_STRUCT))
852 				ktrsockaddr(p, mtod(from, caddr_t), alen);
853 #endif
854 		}
855 		mp->msg_namelen = alen;
856 		if (namelenp &&
857 		    (error = copyout(&alen, namelenp, sizeof(alen)))) {
858 			goto out;
859 		}
860 	}
861 	if (mp->msg_control) {
862 		len = mp->msg_controllen;
863 		if (len <= 0 || control == NULL)
864 			len = 0;
865 		else {
866 			struct mbuf *m = control;
867 			caddr_t cp = mp->msg_control;
868 
869 			do {
870 				i = m->m_len;
871 				if (len < i) {
872 					mp->msg_flags |= MSG_CTRUNC;
873 					i = len;
874 				}
875 				error = copyout(mtod(m, caddr_t), cp, i);
876 #ifdef KTRACE
877 				if (KTRPOINT(p, KTR_STRUCT) && error == 0 && i) {
878 					/* msg_flags potentially incorrect */
879 					int rmsgflags = mp->msg_flags;
880 
881 					mp->msg_flags = kmsgflags;
882 					ktrcmsghdr(p, mtod(m, char *), i);
883 					mp->msg_flags = rmsgflags;
884 				}
885 #endif
886 				if (m->m_next)
887 					i = ALIGN(i);
888 				cp += i;
889 				len -= i;
890 				if (error != 0 || len <= 0)
891 					break;
892 			} while ((m = m->m_next) != NULL);
893 			len = cp - (caddr_t)mp->msg_control;
894 		}
895 		mp->msg_controllen = len;
896 	}
897 	if (!error) {
898 		mtx_enter(&fp->f_mtx);
899 		fp->f_rxfer++;
900 		fp->f_rbytes += *retsize;
901 		mtx_leave(&fp->f_mtx);
902 	}
903 out:
904 	FRELE(fp, p);
905 	m_freem(from);
906 	m_freem(control);
907 	return (error);
908 }
909 
910 int
911 sys_shutdown(struct proc *p, void *v, register_t *retval)
912 {
913 	struct sys_shutdown_args /* {
914 		syscallarg(int) s;
915 		syscallarg(int) how;
916 	} */ *uap = v;
917 	struct file *fp;
918 	int error;
919 
920 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
921 		return (error);
922 	error = soshutdown(fp->f_data, SCARG(uap, how));
923 	FRELE(fp, p);
924 	return (error);
925 }
926 
927 int
928 sys_setsockopt(struct proc *p, void *v, register_t *retval)
929 {
930 	struct sys_setsockopt_args /* {
931 		syscallarg(int) s;
932 		syscallarg(int) level;
933 		syscallarg(int) name;
934 		syscallarg(const void *) val;
935 		syscallarg(socklen_t) valsize;
936 	} */ *uap = v;
937 	struct file *fp;
938 	struct mbuf *m = NULL;
939 	struct socket *so;
940 	int s, error;
941 
942 
943 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
944 		return (error);
945 	error = pledge_sockopt(p, 1, SCARG(uap, level), SCARG(uap, name));
946 	if (error)
947 		goto bad;
948 	if (SCARG(uap, valsize) > MCLBYTES) {
949 		error = EINVAL;
950 		goto bad;
951 	}
952 	if (SCARG(uap, val)) {
953 		m = m_get(M_WAIT, MT_SOOPTS);
954 		if (SCARG(uap, valsize) > MLEN) {
955 			MCLGET(m, M_DONTWAIT);
956 			if ((m->m_flags & M_EXT) == 0) {
957 				error = ENOBUFS;
958 				goto bad;
959 			}
960 		}
961 		if (m == NULL) {
962 			error = ENOBUFS;
963 			goto bad;
964 		}
965 		error = copyin(SCARG(uap, val), mtod(m, caddr_t),
966 		    SCARG(uap, valsize));
967 		if (error) {
968 			goto bad;
969 		}
970 		m->m_len = SCARG(uap, valsize);
971 	}
972 	so = fp->f_data;
973 	s = solock(so);
974 	error = sosetopt(so, SCARG(uap, level), SCARG(uap, name), m);
975 	sounlock(so, s);
976 bad:
977 	m_freem(m);
978 	FRELE(fp, p);
979 	return (error);
980 }
981 
982 int
983 sys_getsockopt(struct proc *p, void *v, register_t *retval)
984 {
985 	struct sys_getsockopt_args /* {
986 		syscallarg(int) s;
987 		syscallarg(int) level;
988 		syscallarg(int) name;
989 		syscallarg(void *) val;
990 		syscallarg(socklen_t *) avalsize;
991 	} */ *uap = v;
992 	struct file *fp;
993 	struct mbuf *m = NULL;
994 	socklen_t valsize;
995 	struct socket *so;
996 	int s, error;
997 
998 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
999 		return (error);
1000 	error = pledge_sockopt(p, 0, SCARG(uap, level), SCARG(uap, name));
1001 	if (error)
1002 		goto out;
1003 	if (SCARG(uap, val)) {
1004 		error = copyin(SCARG(uap, avalsize),
1005 		    &valsize, sizeof (valsize));
1006 		if (error)
1007 			goto out;
1008 	} else
1009 		valsize = 0;
1010 	m = m_get(M_WAIT, MT_SOOPTS);
1011 	so = fp->f_data;
1012 	s = solock(so);
1013 	error = sogetopt(so, SCARG(uap, level), SCARG(uap, name), m);
1014 	sounlock(so, s);
1015 	if (error == 0 && SCARG(uap, val) && valsize && m != NULL) {
1016 		if (valsize > m->m_len)
1017 			valsize = m->m_len;
1018 		error = copyout(mtod(m, caddr_t), SCARG(uap, val), valsize);
1019 		if (error == 0)
1020 			error = copyout(&valsize,
1021 			    SCARG(uap, avalsize), sizeof (valsize));
1022 	}
1023 	m_free(m);
1024 out:
1025 	FRELE(fp, p);
1026 	return (error);
1027 }
1028 
1029 /*
1030  * Get socket name.
1031  */
1032 int
1033 sys_getsockname(struct proc *p, void *v, register_t *retval)
1034 {
1035 	struct sys_getsockname_args /* {
1036 		syscallarg(int) fdes;
1037 		syscallarg(struct sockaddr *) asa;
1038 		syscallarg(socklen_t *) alen;
1039 	} */ *uap = v;
1040 	struct file *fp;
1041 	struct socket *so;
1042 	struct mbuf *m = NULL;
1043 	socklen_t len;
1044 	int error, s;
1045 
1046 	if ((error = getsock(p, SCARG(uap, fdes), &fp)) != 0)
1047 		return (error);
1048 	error = copyin(SCARG(uap, alen), &len, sizeof (len));
1049 	if (error)
1050 		goto bad;
1051 	so = fp->f_data;
1052 	error = pledge_socket(p, -1, so->so_state);
1053 	if (error)
1054 		goto bad;
1055 	m = m_getclr(M_WAIT, MT_SONAME);
1056 	s = solock(so);
1057 	error = (*so->so_proto->pr_usrreq)(so, PRU_SOCKADDR, 0, m, 0, p);
1058 	sounlock(so, s);
1059 	if (error)
1060 		goto bad;
1061 	error = copyaddrout(p, m, SCARG(uap, asa), len, SCARG(uap, alen));
1062 bad:
1063 	FRELE(fp, p);
1064 	m_freem(m);
1065 	return (error);
1066 }
1067 
1068 /*
1069  * Get name of peer for connected socket.
1070  */
1071 int
1072 sys_getpeername(struct proc *p, void *v, register_t *retval)
1073 {
1074 	struct sys_getpeername_args /* {
1075 		syscallarg(int) fdes;
1076 		syscallarg(struct sockaddr *) asa;
1077 		syscallarg(socklen_t *) alen;
1078 	} */ *uap = v;
1079 	struct file *fp;
1080 	struct socket *so;
1081 	struct mbuf *m = NULL;
1082 	socklen_t len;
1083 	int error, s;
1084 
1085 	if ((error = getsock(p, SCARG(uap, fdes), &fp)) != 0)
1086 		return (error);
1087 	so = fp->f_data;
1088 	error = pledge_socket(p, -1, so->so_state);
1089 	if (error)
1090 		goto bad;
1091 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1092 		error = ENOTCONN;
1093 		goto bad;
1094 	}
1095 	error = copyin(SCARG(uap, alen), &len, sizeof (len));
1096 	if (error)
1097 		goto bad;
1098 	m = m_getclr(M_WAIT, MT_SONAME);
1099 	s = solock(so);
1100 	error = (*so->so_proto->pr_usrreq)(so, PRU_PEERADDR, 0, m, 0, p);
1101 	sounlock(so, s);
1102 	if (error)
1103 		goto bad;
1104 	error = copyaddrout(p, m, SCARG(uap, asa), len, SCARG(uap, alen));
1105 bad:
1106 	FRELE(fp, p);
1107 	m_freem(m);
1108 	return (error);
1109 }
1110 
1111 int
1112 sockargs(struct mbuf **mp, const void *buf, size_t buflen, int type)
1113 {
1114 	struct sockaddr *sa;
1115 	struct mbuf *m;
1116 	int error;
1117 
1118 	/*
1119 	 * We can't allow socket names > UCHAR_MAX in length, since that
1120 	 * will overflow sa_len. Also, control data more than MCLBYTES in
1121 	 * length is just too much.
1122 	 * Memory for sa_len and sa_family must exist.
1123 	 */
1124 	if ((buflen > (type == MT_SONAME ? UCHAR_MAX : MCLBYTES)) ||
1125 	    (type == MT_SONAME && buflen < offsetof(struct sockaddr, sa_data)))
1126 		return (EINVAL);
1127 
1128 	/* Allocate an mbuf to hold the arguments. */
1129 	m = m_get(M_WAIT, type);
1130 	if (buflen > MLEN) {
1131 		MCLGET(m, M_WAITOK);
1132 		if ((m->m_flags & M_EXT) == 0) {
1133 			m_free(m);
1134 			return ENOBUFS;
1135 		}
1136 	}
1137 	m->m_len = buflen;
1138 	error = copyin(buf, mtod(m, caddr_t), buflen);
1139 	if (error) {
1140 		(void) m_free(m);
1141 		return (error);
1142 	}
1143 	*mp = m;
1144 	if (type == MT_SONAME) {
1145 		sa = mtod(m, struct sockaddr *);
1146 		sa->sa_len = buflen;
1147 	}
1148 	return (0);
1149 }
1150 
1151 int
1152 getsock(struct proc *p, int fdes, struct file **fpp)
1153 {
1154 	struct file *fp;
1155 
1156 	fp = fd_getfile(p->p_fd, fdes);
1157 	if (fp == NULL)
1158 		return (EBADF);
1159 	if (fp->f_type != DTYPE_SOCKET) {
1160 		FRELE(fp, p);
1161 		return (ENOTSOCK);
1162 	}
1163 	*fpp = fp;
1164 
1165 	return (0);
1166 }
1167 
1168 int
1169 sys_setrtable(struct proc *p, void *v, register_t *retval)
1170 {
1171 	struct sys_setrtable_args /* {
1172 		syscallarg(int) rtableid;
1173 	} */ *uap = v;
1174 	u_int ps_rtableid = p->p_p->ps_rtableid;
1175 	int rtableid, error;
1176 
1177 	rtableid = SCARG(uap, rtableid);
1178 
1179 	if (ps_rtableid == rtableid)
1180 		return (0);
1181 	if (ps_rtableid != 0 && (error = suser(p)) != 0)
1182 		return (error);
1183 	if (rtableid < 0 || !rtable_exists((u_int)rtableid))
1184 		return (EINVAL);
1185 
1186 	p->p_p->ps_rtableid = (u_int)rtableid;
1187 	return (0);
1188 }
1189 
1190 int
1191 sys_getrtable(struct proc *p, void *v, register_t *retval)
1192 {
1193 	*retval = (int)p->p_p->ps_rtableid;
1194 	return (0);
1195 }
1196 
1197 int
1198 copyaddrout(struct proc *p, struct mbuf *name, struct sockaddr *sa,
1199     socklen_t buflen, socklen_t *outlen)
1200 {
1201 	int error;
1202 	socklen_t namelen = name->m_len;
1203 
1204 	/* SHOULD COPY OUT A CHAIN HERE */
1205 	error = copyout(mtod(name, caddr_t), sa, MIN(buflen, namelen));
1206 	if (error == 0) {
1207 #ifdef KTRACE
1208 		if (KTRPOINT(p, KTR_STRUCT))
1209 			ktrsockaddr(p, mtod(name, caddr_t), namelen);
1210 #endif
1211 		error = copyout(&namelen, outlen, sizeof(*outlen));
1212 	}
1213 
1214 	return (error);
1215 }
1216