xref: /openbsd/sys/kern/uipc_syscalls.c (revision 84245c07)
1 /*	$OpenBSD: uipc_syscalls.c,v 1.195 2022/06/06 14:45:41 claudio Exp $	*/
2 /*	$NetBSD: uipc_syscalls.c,v 1.19 1996/02/09 19:00:48 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/filedesc.h>
38 #include <sys/proc.h>
39 #include <sys/fcntl.h>
40 #include <sys/file.h>
41 #include <sys/ioctl.h>
42 #include <sys/malloc.h>
43 #include <sys/event.h>
44 #include <sys/mbuf.h>
45 #include <sys/protosw.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/signalvar.h>
49 #include <sys/pledge.h>
50 #include <sys/unpcb.h>
51 #include <sys/un.h>
52 #ifdef KTRACE
53 #include <sys/ktrace.h>
54 #endif
55 
56 #include <sys/mount.h>
57 #include <sys/syscallargs.h>
58 
59 #include <sys/domain.h>
60 #include <netinet/in.h>
61 #include <net/route.h>
62 
63 int	copyaddrout(struct proc *, struct mbuf *, struct sockaddr *, socklen_t,
64 	    socklen_t *);
65 
66 int
67 sys_socket(struct proc *p, void *v, register_t *retval)
68 {
69 	struct sys_socket_args /* {
70 		syscallarg(int) domain;
71 		syscallarg(int) type;
72 		syscallarg(int) protocol;
73 	} */ *uap = v;
74 	struct filedesc *fdp = p->p_fd;
75 	struct socket *so;
76 	struct file *fp;
77 	int type = SCARG(uap, type);
78 	int domain = SCARG(uap, domain);
79 	int fd, cloexec, nonblock, fflag, error;
80 	unsigned int ss = 0;
81 
82 	if ((type & SOCK_DNS) && !(domain == AF_INET || domain == AF_INET6))
83 		return (EINVAL);
84 
85 	if (ISSET(type, SOCK_DNS))
86 		ss |= SS_DNS;
87 	error = pledge_socket(p, domain, ss);
88 	if (error)
89 		return (error);
90 
91 	type &= ~(SOCK_CLOEXEC | SOCK_NONBLOCK | SOCK_DNS);
92 	cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
93 	nonblock = SCARG(uap, type) & SOCK_NONBLOCK;
94 	fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0);
95 
96 	error = socreate(SCARG(uap, domain), &so, type, SCARG(uap, protocol));
97 	if (error)
98 		return (error);
99 
100 	fdplock(fdp);
101 	error = falloc(p, &fp, &fd);
102 	if (error) {
103 		fdpunlock(fdp);
104 		soclose(so, MSG_DONTWAIT);
105 	} else {
106 		fp->f_flag = fflag;
107 		fp->f_type = DTYPE_SOCKET;
108 		fp->f_ops = &socketops;
109 		so->so_state |= ss;
110 		fp->f_data = so;
111 		fdinsert(fdp, fd, cloexec, fp);
112 		fdpunlock(fdp);
113 		FRELE(fp, p);
114 		*retval = fd;
115 	}
116 	return (error);
117 }
118 
119 static inline int
120 isdnssocket(struct socket *so)
121 {
122 	return (so->so_state & SS_DNS);
123 }
124 
125 /* For SS_DNS sockets, only allow port DNS (port 53) */
126 static int
127 dns_portcheck(struct proc *p, struct socket *so, void *nam, size_t namelen)
128 {
129 	int error = EINVAL;
130 
131 	switch (so->so_proto->pr_domain->dom_family) {
132 	case AF_INET:
133 		if (namelen < sizeof(struct sockaddr_in))
134 			break;
135 		if (((struct sockaddr_in *)nam)->sin_port == htons(53))
136 			error = 0;
137 		break;
138 #ifdef INET6
139 	case AF_INET6:
140 		if (namelen < sizeof(struct sockaddr_in6))
141 			break;
142 		if (((struct sockaddr_in6 *)nam)->sin6_port == htons(53))
143 			error = 0;
144 #endif
145 	}
146 	if (error && p->p_p->ps_flags & PS_PLEDGE)
147 		return (pledge_fail(p, EPERM, PLEDGE_DNS));
148 	return error;
149 }
150 
151 int
152 sys_bind(struct proc *p, void *v, register_t *retval)
153 {
154 	struct sys_bind_args /* {
155 		syscallarg(int) s;
156 		syscallarg(const struct sockaddr *) name;
157 		syscallarg(socklen_t) namelen;
158 	} */ *uap = v;
159 	struct file *fp;
160 	struct mbuf *nam;
161 	struct socket *so;
162 	int error;
163 
164 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
165 		return (error);
166 	so = fp->f_data;
167 	error = pledge_socket(p, so->so_proto->pr_domain->dom_family,
168 	    so->so_state);
169 	if (error)
170 		goto out;
171 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
172 	    MT_SONAME);
173 	if (error)
174 		goto out;
175 #ifdef KTRACE
176 	if (KTRPOINT(p, KTR_STRUCT))
177 		ktrsockaddr(p, mtod(nam, caddr_t), SCARG(uap, namelen));
178 #endif
179 	solock(so);
180 	error = sobind(so, nam, p);
181 	sounlock(so);
182 	m_freem(nam);
183 out:
184 	FRELE(fp, p);
185 	return (error);
186 }
187 
188 int
189 sys_listen(struct proc *p, void *v, register_t *retval)
190 {
191 	struct sys_listen_args /* {
192 		syscallarg(int) s;
193 		syscallarg(int) backlog;
194 	} */ *uap = v;
195 	struct file *fp;
196 	struct socket *so;
197 	int error;
198 
199 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
200 		return (error);
201 	so = fp->f_data;
202 	solock(so);
203 	error = solisten(so, SCARG(uap, backlog));
204 	sounlock(so);
205 	FRELE(fp, p);
206 	return (error);
207 }
208 
209 int
210 sys_accept(struct proc *p, void *v, register_t *retval)
211 {
212 	struct sys_accept_args /* {
213 		syscallarg(int) s;
214 		syscallarg(struct sockaddr *) name;
215 		syscallarg(socklen_t *) anamelen;
216 	} */ *uap = v;
217 
218 	return (doaccept(p, SCARG(uap, s), SCARG(uap, name),
219 	    SCARG(uap, anamelen), SOCK_NONBLOCK_INHERIT, retval));
220 }
221 
222 int
223 sys_accept4(struct proc *p, void *v, register_t *retval)
224 {
225 	struct sys_accept4_args /* {
226 		syscallarg(int) s;
227 		syscallarg(struct sockaddr *) name;
228 		syscallarg(socklen_t *) anamelen;
229 		syscallarg(socklen_t *) int flags;
230 	} */ *uap = v;
231 
232 	if (SCARG(uap, flags) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
233 		return (EINVAL);
234 
235 	return (doaccept(p, SCARG(uap, s), SCARG(uap, name),
236 	    SCARG(uap, anamelen), SCARG(uap, flags), retval));
237 }
238 
239 int
240 doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen,
241     int flags, register_t *retval)
242 {
243 	struct filedesc *fdp = p->p_fd;
244 	struct file *fp, *headfp;
245 	struct mbuf *nam;
246 	socklen_t namelen;
247 	int error, tmpfd;
248 	struct socket *head, *so;
249 	int cloexec, nflag;
250 
251 	cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
252 
253 	if (name && (error = copyin(anamelen, &namelen, sizeof (namelen))))
254 		return (error);
255 	if ((error = getsock(p, sock, &fp)) != 0)
256 		return (error);
257 
258 	headfp = fp;
259 
260 	fdplock(fdp);
261 	error = falloc(p, &fp, &tmpfd);
262 	fdpunlock(fdp);
263 	if (error) {
264 		FRELE(headfp, p);
265 		return (error);
266 	}
267 
268 	nam = m_get(M_WAIT, MT_SONAME);
269 
270 	head = headfp->f_data;
271 	solock(head);
272 	if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) {
273 		error = EINVAL;
274 		goto out;
275 	}
276 	if ((headfp->f_flag & FNONBLOCK) && head->so_qlen == 0) {
277 		if (head->so_state & SS_CANTRCVMORE)
278 			error = ECONNABORTED;
279 		else
280 			error = EWOULDBLOCK;
281 		goto out;
282 	}
283 	while (head->so_qlen == 0 && head->so_error == 0) {
284 		if (head->so_state & SS_CANTRCVMORE) {
285 			head->so_error = ECONNABORTED;
286 			break;
287 		}
288 		error = sosleep_nsec(head, &head->so_timeo, PSOCK | PCATCH,
289 		    "netcon", INFSLP);
290 		if (error)
291 			goto out;
292 	}
293 	if (head->so_error) {
294 		error = head->so_error;
295 		head->so_error = 0;
296 		goto out;
297 	}
298 
299 	/*
300 	 * Do not sleep after we have taken the socket out of the queue.
301 	 */
302 	so = TAILQ_FIRST(&head->so_q);
303 	if (soqremque(so, 1) == 0)
304 		panic("accept");
305 
306 	/* Figure out whether the new socket should be non-blocking. */
307 	nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK)
308 	    : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0);
309 
310 	/* connection has been removed from the listen queue */
311 	KNOTE(&head->so_rcv.sb_sel.si_note, 0);
312 
313 	fp->f_type = DTYPE_SOCKET;
314 	fp->f_flag = FREAD | FWRITE | nflag;
315 	fp->f_ops = &socketops;
316 	fp->f_data = so;
317 	error = soaccept(so, nam);
318 out:
319 	sounlock(head);
320 	if (!error && name != NULL)
321 		error = copyaddrout(p, nam, name, namelen, anamelen);
322 	if (!error) {
323 		fdplock(fdp);
324 		fdinsert(fdp, tmpfd, cloexec, fp);
325 		fdpunlock(fdp);
326 		FRELE(fp, p);
327 		*retval = tmpfd;
328 	} else {
329 		fdplock(fdp);
330 		fdremove(fdp, tmpfd);
331 		fdpunlock(fdp);
332 		closef(fp, p);
333 	}
334 
335 	m_freem(nam);
336 	FRELE(headfp, p);
337 	return (error);
338 }
339 
340 int
341 sys_connect(struct proc *p, void *v, register_t *retval)
342 {
343 	struct sys_connect_args /* {
344 		syscallarg(int) s;
345 		syscallarg(const struct sockaddr *) name;
346 		syscallarg(socklen_t) namelen;
347 	} */ *uap = v;
348 	struct file *fp;
349 	struct socket *so;
350 	struct mbuf *nam;
351 	int error, interrupted = 0;
352 
353 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
354 		return (error);
355 	so = fp->f_data;
356 	error = pledge_socket(p, so->so_proto->pr_domain->dom_family,
357 	    so->so_state);
358 	if (error)
359 		goto out;
360 	error = sockargs(&nam, SCARG(uap, name), SCARG(uap, namelen),
361 	    MT_SONAME);
362 	if (error)
363 		goto out;
364 #ifdef KTRACE
365 	if (KTRPOINT(p, KTR_STRUCT))
366 		ktrsockaddr(p, mtod(nam, caddr_t), SCARG(uap, namelen));
367 #endif
368 	solock(so);
369 	if (isdnssocket(so)) {
370 		error = dns_portcheck(p, so, mtod(nam, void *), nam->m_len);
371 		if (error)
372 			goto unlock;
373 	}
374 	if (so->so_state & SS_ISCONNECTING) {
375 		error = EALREADY;
376 		goto unlock;
377 	}
378 	error = soconnect(so, nam);
379 	if (error)
380 		goto bad;
381 	if ((fp->f_flag & FNONBLOCK) && (so->so_state & SS_ISCONNECTING)) {
382 		error = EINPROGRESS;
383 		goto unlock;
384 	}
385 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
386 		error = sosleep_nsec(so, &so->so_timeo, PSOCK | PCATCH,
387 		    "netcon2", INFSLP);
388 		if (error) {
389 			if (error == EINTR || error == ERESTART)
390 				interrupted = 1;
391 			break;
392 		}
393 	}
394 	if (error == 0) {
395 		error = so->so_error;
396 		so->so_error = 0;
397 	}
398 bad:
399 	if (!interrupted)
400 		so->so_state &= ~SS_ISCONNECTING;
401 unlock:
402 	sounlock(so);
403 	m_freem(nam);
404 out:
405 	FRELE(fp, p);
406 	if (error == ERESTART)
407 		error = EINTR;
408 	return (error);
409 }
410 
411 int
412 sys_socketpair(struct proc *p, void *v, register_t *retval)
413 {
414 	struct sys_socketpair_args /* {
415 		syscallarg(int) domain;
416 		syscallarg(int) type;
417 		syscallarg(int) protocol;
418 		syscallarg(int *) rsv;
419 	} */ *uap = v;
420 	struct filedesc *fdp = p->p_fd;
421 	struct file *fp1 = NULL, *fp2 = NULL;
422 	struct socket *so1, *so2;
423 	int type, cloexec, nonblock, fflag, error, sv[2];
424 
425 	type  = SCARG(uap, type) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK);
426 	cloexec = (SCARG(uap, type) & SOCK_CLOEXEC) ? UF_EXCLOSE : 0;
427 	nonblock = SCARG(uap, type) & SOCK_NONBLOCK;
428 	fflag = FREAD | FWRITE | (nonblock ? FNONBLOCK : 0);
429 
430 	error = socreate(SCARG(uap, domain), &so1, type, SCARG(uap, protocol));
431 	if (error)
432 		return (error);
433 	error = socreate(SCARG(uap, domain), &so2, type, SCARG(uap, protocol));
434 	if (error)
435 		goto free1;
436 
437 	error = soconnect2(so1, so2);
438 	if (error != 0)
439 		goto free2;
440 
441 	if ((SCARG(uap, type) & SOCK_TYPE_MASK) == SOCK_DGRAM) {
442 		/*
443 		 * Datagram socket connection is asymmetric.
444 		 */
445 		error = soconnect2(so2, so1);
446 		if (error != 0)
447 			goto free2;
448 	}
449 	fdplock(fdp);
450 	if ((error = falloc(p, &fp1, &sv[0])) != 0)
451 		goto free3;
452 	fp1->f_flag = fflag;
453 	fp1->f_type = DTYPE_SOCKET;
454 	fp1->f_ops = &socketops;
455 	fp1->f_data = so1;
456 	if ((error = falloc(p, &fp2, &sv[1])) != 0)
457 		goto free4;
458 	fp2->f_flag = fflag;
459 	fp2->f_type = DTYPE_SOCKET;
460 	fp2->f_ops = &socketops;
461 	fp2->f_data = so2;
462 	error = copyout(sv, SCARG(uap, rsv), 2 * sizeof (int));
463 	if (error == 0) {
464 		fdinsert(fdp, sv[0], cloexec, fp1);
465 		fdinsert(fdp, sv[1], cloexec, fp2);
466 		fdpunlock(fdp);
467 #ifdef KTRACE
468 		if (KTRPOINT(p, KTR_STRUCT))
469 			ktrfds(p, sv, 2);
470 #endif
471 		FRELE(fp1, p);
472 		FRELE(fp2, p);
473 		return (0);
474 	}
475 	fdremove(fdp, sv[1]);
476 free4:
477 	fdremove(fdp, sv[0]);
478 free3:
479 	fdpunlock(fdp);
480 
481 	if (fp2 != NULL) {
482 		closef(fp2, p);
483 		so2 = NULL;
484 	}
485 	if (fp1 != NULL) {
486 		closef(fp1, p);
487 		so1 = NULL;
488 	}
489 free2:
490 	if (so2 != NULL)
491 		(void)soclose(so2, 0);
492 free1:
493 	if (so1 != NULL)
494 		(void)soclose(so1, 0);
495 	return (error);
496 }
497 
498 int
499 sys_sendto(struct proc *p, void *v, register_t *retval)
500 {
501 	struct sys_sendto_args /* {
502 		syscallarg(int) s;
503 		syscallarg(const void *) buf;
504 		syscallarg(size_t) len;
505 		syscallarg(int) flags;
506 		syscallarg(const struct sockaddr *) to;
507 		syscallarg(socklen_t) tolen;
508 	} */ *uap = v;
509 	struct msghdr msg;
510 	struct iovec aiov;
511 
512 	msg.msg_name = (caddr_t)SCARG(uap, to);
513 	msg.msg_namelen = SCARG(uap, tolen);
514 	msg.msg_iov = &aiov;
515 	msg.msg_iovlen = 1;
516 	msg.msg_control = NULL;
517 	msg.msg_flags = 0;
518 	aiov.iov_base = (char *)SCARG(uap, buf);
519 	aiov.iov_len = SCARG(uap, len);
520 	return (sendit(p, SCARG(uap, s), &msg, SCARG(uap, flags), retval));
521 }
522 
523 int
524 sys_sendmsg(struct proc *p, void *v, register_t *retval)
525 {
526 	struct sys_sendmsg_args /* {
527 		syscallarg(int) s;
528 		syscallarg(const struct msghdr *) msg;
529 		syscallarg(int) flags;
530 	} */ *uap = v;
531 	struct msghdr msg;
532 	struct iovec aiov[UIO_SMALLIOV], *iov;
533 	int error;
534 
535 	error = copyin(SCARG(uap, msg), &msg, sizeof (msg));
536 	if (error)
537 		return (error);
538 #ifdef KTRACE
539 	if (KTRPOINT(p, KTR_STRUCT))
540 		ktrmsghdr(p, &msg);
541 #endif
542 
543 	if (msg.msg_iovlen > IOV_MAX)
544 		return (EMSGSIZE);
545 	if (msg.msg_iovlen > UIO_SMALLIOV)
546 		iov = mallocarray(msg.msg_iovlen, sizeof(struct iovec),
547 		    M_IOV, M_WAITOK);
548 	else
549 		iov = aiov;
550 	if (msg.msg_iovlen &&
551 	    (error = copyin(msg.msg_iov, iov,
552 		    msg.msg_iovlen * sizeof (struct iovec))))
553 		goto done;
554 #ifdef KTRACE
555 	if (msg.msg_iovlen && KTRPOINT(p, KTR_STRUCT))
556 		ktriovec(p, iov, msg.msg_iovlen);
557 #endif
558 	msg.msg_iov = iov;
559 	msg.msg_flags = 0;
560 	error = sendit(p, SCARG(uap, s), &msg, SCARG(uap, flags), retval);
561 done:
562 	if (iov != aiov)
563 		free(iov, M_IOV, sizeof(struct iovec) * msg.msg_iovlen);
564 	return (error);
565 }
566 
567 int
568 sendit(struct proc *p, int s, struct msghdr *mp, int flags, register_t *retsize)
569 {
570 	struct file *fp;
571 	struct uio auio;
572 	struct iovec *iov;
573 	int i;
574 	struct mbuf *to, *control;
575 	struct socket *so;
576 	size_t len;
577 	int error;
578 #ifdef KTRACE
579 	struct iovec *ktriov = NULL;
580 	int iovlen = 0;
581 #endif
582 
583 	to = NULL;
584 
585 	if ((error = getsock(p, s, &fp)) != 0)
586 		return (error);
587 	so = fp->f_data;
588 	if (fp->f_flag & FNONBLOCK)
589 		flags |= MSG_DONTWAIT;
590 
591 	error = pledge_sendit(p, mp->msg_name);
592 	if (error)
593 		goto bad;
594 
595 	auio.uio_iov = mp->msg_iov;
596 	auio.uio_iovcnt = mp->msg_iovlen;
597 	auio.uio_segflg = UIO_USERSPACE;
598 	auio.uio_rw = UIO_WRITE;
599 	auio.uio_procp = p;
600 	auio.uio_offset = 0;			/* XXX */
601 	auio.uio_resid = 0;
602 	iov = mp->msg_iov;
603 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
604 		/* Don't allow sum > SSIZE_MAX */
605 		if (iov->iov_len > SSIZE_MAX ||
606 		    (auio.uio_resid += iov->iov_len) > SSIZE_MAX) {
607 			error = EINVAL;
608 			goto bad;
609 		}
610 	}
611 	if (mp->msg_name) {
612 		error = sockargs(&to, mp->msg_name, mp->msg_namelen,
613 		    MT_SONAME);
614 		if (error)
615 			goto bad;
616 		if (isdnssocket(so)) {
617 			error = dns_portcheck(p, so, mtod(to, caddr_t),
618 			    mp->msg_namelen);
619 			if (error)
620 				goto bad;
621 		}
622 #ifdef KTRACE
623 		if (KTRPOINT(p, KTR_STRUCT))
624 			ktrsockaddr(p, mtod(to, caddr_t), mp->msg_namelen);
625 #endif
626 	}
627 	if (mp->msg_control) {
628 		if (mp->msg_controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) {
629 			error = EINVAL;
630 			goto bad;
631 		}
632 		error = sockargs(&control, mp->msg_control,
633 		    mp->msg_controllen, MT_CONTROL);
634 		if (error)
635 			goto bad;
636 #ifdef KTRACE
637 		if (KTRPOINT(p, KTR_STRUCT) && mp->msg_controllen)
638 			ktrcmsghdr(p, mtod(control, char *),
639 			    mp->msg_controllen);
640 #endif
641 	} else
642 		control = NULL;
643 #ifdef KTRACE
644 	if (KTRPOINT(p, KTR_GENIO)) {
645 		ktriov = mallocarray(auio.uio_iovcnt, sizeof(struct iovec),
646 		    M_TEMP, M_WAITOK);
647 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
648 
649 		memcpy(ktriov, auio.uio_iov, iovlen);
650 	}
651 #endif
652 	len = auio.uio_resid;
653 	error = sosend(so, to, &auio, NULL, control, flags);
654 	if (error) {
655 		if (auio.uio_resid != len && (error == ERESTART ||
656 		    error == EINTR || error == EWOULDBLOCK))
657 			error = 0;
658 		if (error == EPIPE && (flags & MSG_NOSIGNAL) == 0) {
659 			KERNEL_LOCK();
660 			ptsignal(p, SIGPIPE, STHREAD);
661 			KERNEL_UNLOCK();
662 		}
663 	}
664 	if (error == 0) {
665 		*retsize = len - auio.uio_resid;
666 		mtx_enter(&fp->f_mtx);
667 		fp->f_wxfer++;
668 		fp->f_wbytes += *retsize;
669 		mtx_leave(&fp->f_mtx);
670 	}
671 #ifdef KTRACE
672 	if (ktriov != NULL) {
673 		if (error == 0)
674 			ktrgenio(p, s, UIO_WRITE, ktriov, *retsize);
675 		free(ktriov, M_TEMP, iovlen);
676 	}
677 #endif
678 bad:
679 	FRELE(fp, p);
680 	m_freem(to);
681 	return (error);
682 }
683 
684 int
685 sys_recvfrom(struct proc *p, void *v, register_t *retval)
686 {
687 	struct sys_recvfrom_args /* {
688 		syscallarg(int) s;
689 		syscallarg(void *) buf;
690 		syscallarg(size_t) len;
691 		syscallarg(int) flags;
692 		syscallarg(struct sockaddr *) from;
693 		syscallarg(socklen_t *) fromlenaddr;
694 	} */ *uap = v;
695 	struct msghdr msg;
696 	struct iovec aiov;
697 	int error;
698 
699 	if (SCARG(uap, fromlenaddr)) {
700 		error = copyin(SCARG(uap, fromlenaddr),
701 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
702 		if (error)
703 			return (error);
704 	} else
705 		msg.msg_namelen = 0;
706 	msg.msg_name = (caddr_t)SCARG(uap, from);
707 	msg.msg_iov = &aiov;
708 	msg.msg_iovlen = 1;
709 	aiov.iov_base = SCARG(uap, buf);
710 	aiov.iov_len = SCARG(uap, len);
711 	msg.msg_control = NULL;
712 	msg.msg_flags = SCARG(uap, flags);
713 	return (recvit(p, SCARG(uap, s), &msg,
714 	    (caddr_t)SCARG(uap, fromlenaddr), retval));
715 }
716 
717 int
718 sys_recvmsg(struct proc *p, void *v, register_t *retval)
719 {
720 	struct sys_recvmsg_args /* {
721 		syscallarg(int) s;
722 		syscallarg(struct msghdr *) msg;
723 		syscallarg(int) flags;
724 	} */ *uap = v;
725 	struct msghdr msg;
726 	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
727 	int error;
728 
729 	error = copyin(SCARG(uap, msg), &msg, sizeof (msg));
730 	if (error)
731 		return (error);
732 
733 	if (msg.msg_iovlen > IOV_MAX)
734 		return (EMSGSIZE);
735 	if (msg.msg_iovlen > UIO_SMALLIOV)
736 		iov = mallocarray(msg.msg_iovlen, sizeof(struct iovec),
737 		    M_IOV, M_WAITOK);
738 	else
739 		iov = aiov;
740 	msg.msg_flags = SCARG(uap, flags);
741 	if (msg.msg_iovlen > 0) {
742 		error = copyin(msg.msg_iov, iov,
743 		    msg.msg_iovlen * sizeof(struct iovec));
744 		if (error)
745 			goto done;
746 	}
747 	uiov = msg.msg_iov;
748 	msg.msg_iov = iov;
749 	if ((error = recvit(p, SCARG(uap, s), &msg, NULL, retval)) == 0) {
750 		msg.msg_iov = uiov;
751 #ifdef KTRACE
752 		if (KTRPOINT(p, KTR_STRUCT)) {
753 			ktrmsghdr(p, &msg);
754 			if (msg.msg_iovlen)
755 				ktriovec(p, iov, msg.msg_iovlen);
756 		}
757 #endif
758 		error = copyout(&msg, SCARG(uap, msg), sizeof(msg));
759 	}
760 done:
761 	if (iov != aiov)
762 		free(iov, M_IOV, sizeof(struct iovec) * msg.msg_iovlen);
763 	return (error);
764 }
765 
766 int
767 recvit(struct proc *p, int s, struct msghdr *mp, caddr_t namelenp,
768     register_t *retsize)
769 {
770 	struct file *fp;
771 	struct uio auio;
772 	struct iovec *iov;
773 	int i;
774 	size_t len;
775 	int error;
776 	struct mbuf *from = NULL, *control = NULL;
777 #ifdef KTRACE
778 	struct iovec *ktriov = NULL;
779 	int iovlen = 0, kmsgflags;
780 #endif
781 
782 	if ((error = getsock(p, s, &fp)) != 0)
783 		return (error);
784 
785 	auio.uio_iov = mp->msg_iov;
786 	auio.uio_iovcnt = mp->msg_iovlen;
787 	auio.uio_segflg = UIO_USERSPACE;
788 	auio.uio_rw = UIO_READ;
789 	auio.uio_procp = p;
790 	auio.uio_offset = 0;			/* XXX */
791 	auio.uio_resid = 0;
792 	iov = mp->msg_iov;
793 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
794 		/* Don't allow sum > SSIZE_MAX */
795 		if (iov->iov_len > SSIZE_MAX ||
796 		    (auio.uio_resid += iov->iov_len) > SSIZE_MAX) {
797 			error = EINVAL;
798 			goto out;
799 		}
800 	}
801 #ifdef KTRACE
802 	if (KTRPOINT(p, KTR_GENIO)) {
803 		ktriov = mallocarray(auio.uio_iovcnt, sizeof(struct iovec),
804 		    M_TEMP, M_WAITOK);
805 		iovlen = auio.uio_iovcnt * sizeof (struct iovec);
806 
807 		memcpy(ktriov, auio.uio_iov, iovlen);
808 	}
809 	kmsgflags = mp->msg_flags;
810 #endif
811 	len = auio.uio_resid;
812 	if (fp->f_flag & FNONBLOCK)
813 		mp->msg_flags |= MSG_DONTWAIT;
814 	error = soreceive(fp->f_data, &from, &auio, NULL,
815 			  mp->msg_control ? &control : NULL,
816 			  &mp->msg_flags,
817 			  mp->msg_control ? mp->msg_controllen : 0);
818 	if (error) {
819 		if (auio.uio_resid != len && (error == ERESTART ||
820 		    error == EINTR || error == EWOULDBLOCK))
821 			error = 0;
822 	}
823 #ifdef KTRACE
824 	if (ktriov != NULL) {
825 		if (error == 0)
826 			ktrgenio(p, s, UIO_READ, ktriov, len - auio.uio_resid);
827 		free(ktriov, M_TEMP, iovlen);
828 	}
829 #endif
830 	if (error)
831 		goto out;
832 	*retsize = len - auio.uio_resid;
833 	if (mp->msg_name) {
834 		socklen_t alen;
835 
836 		if (from == NULL)
837 			alen = 0;
838 		else {
839 			alen = from->m_len;
840 			error = copyout(mtod(from, caddr_t), mp->msg_name,
841 			    MIN(alen, mp->msg_namelen));
842 			if (error)
843 				goto out;
844 #ifdef KTRACE
845 			if (KTRPOINT(p, KTR_STRUCT))
846 				ktrsockaddr(p, mtod(from, caddr_t), alen);
847 #endif
848 		}
849 		mp->msg_namelen = alen;
850 		if (namelenp &&
851 		    (error = copyout(&alen, namelenp, sizeof(alen)))) {
852 			goto out;
853 		}
854 	}
855 	if (mp->msg_control) {
856 		len = mp->msg_controllen;
857 		if (len <= 0 || control == NULL)
858 			len = 0;
859 		else {
860 			struct mbuf *m = control;
861 			caddr_t cp = mp->msg_control;
862 
863 			do {
864 				i = m->m_len;
865 				if (len < i) {
866 					mp->msg_flags |= MSG_CTRUNC;
867 					i = len;
868 				}
869 				error = copyout(mtod(m, caddr_t), cp, i);
870 #ifdef KTRACE
871 				if (KTRPOINT(p, KTR_STRUCT) && error == 0 && i) {
872 					/* msg_flags potentially incorrect */
873 					int rmsgflags = mp->msg_flags;
874 
875 					mp->msg_flags = kmsgflags;
876 					ktrcmsghdr(p, mtod(m, char *), i);
877 					mp->msg_flags = rmsgflags;
878 				}
879 #endif
880 				if (m->m_next)
881 					i = ALIGN(i);
882 				cp += i;
883 				len -= i;
884 				if (error != 0 || len <= 0)
885 					break;
886 			} while ((m = m->m_next) != NULL);
887 			len = cp - (caddr_t)mp->msg_control;
888 		}
889 		mp->msg_controllen = len;
890 	}
891 	if (!error) {
892 		mtx_enter(&fp->f_mtx);
893 		fp->f_rxfer++;
894 		fp->f_rbytes += *retsize;
895 		mtx_leave(&fp->f_mtx);
896 	}
897 out:
898 	FRELE(fp, p);
899 	m_freem(from);
900 	m_freem(control);
901 	return (error);
902 }
903 
904 int
905 sys_shutdown(struct proc *p, void *v, register_t *retval)
906 {
907 	struct sys_shutdown_args /* {
908 		syscallarg(int) s;
909 		syscallarg(int) how;
910 	} */ *uap = v;
911 	struct file *fp;
912 	int error;
913 
914 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
915 		return (error);
916 	error = soshutdown(fp->f_data, SCARG(uap, how));
917 	FRELE(fp, p);
918 	return (error);
919 }
920 
921 int
922 sys_setsockopt(struct proc *p, void *v, register_t *retval)
923 {
924 	struct sys_setsockopt_args /* {
925 		syscallarg(int) s;
926 		syscallarg(int) level;
927 		syscallarg(int) name;
928 		syscallarg(const void *) val;
929 		syscallarg(socklen_t) valsize;
930 	} */ *uap = v;
931 	struct file *fp;
932 	struct mbuf *m = NULL;
933 	struct socket *so;
934 	int error;
935 
936 
937 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
938 		return (error);
939 	error = pledge_sockopt(p, 1, SCARG(uap, level), SCARG(uap, name));
940 	if (error)
941 		goto bad;
942 	if (SCARG(uap, valsize) > MCLBYTES) {
943 		error = EINVAL;
944 		goto bad;
945 	}
946 	if (SCARG(uap, val)) {
947 		m = m_get(M_WAIT, MT_SOOPTS);
948 		if (SCARG(uap, valsize) > MLEN) {
949 			MCLGET(m, M_DONTWAIT);
950 			if ((m->m_flags & M_EXT) == 0) {
951 				error = ENOBUFS;
952 				goto bad;
953 			}
954 		}
955 		if (m == NULL) {
956 			error = ENOBUFS;
957 			goto bad;
958 		}
959 		error = copyin(SCARG(uap, val), mtod(m, caddr_t),
960 		    SCARG(uap, valsize));
961 		if (error) {
962 			goto bad;
963 		}
964 		m->m_len = SCARG(uap, valsize);
965 	}
966 	so = fp->f_data;
967 	solock(so);
968 	error = sosetopt(so, SCARG(uap, level), SCARG(uap, name), m);
969 	sounlock(so);
970 bad:
971 	m_freem(m);
972 	FRELE(fp, p);
973 	return (error);
974 }
975 
976 int
977 sys_getsockopt(struct proc *p, void *v, register_t *retval)
978 {
979 	struct sys_getsockopt_args /* {
980 		syscallarg(int) s;
981 		syscallarg(int) level;
982 		syscallarg(int) name;
983 		syscallarg(void *) val;
984 		syscallarg(socklen_t *) avalsize;
985 	} */ *uap = v;
986 	struct file *fp;
987 	struct mbuf *m = NULL;
988 	socklen_t valsize;
989 	struct socket *so;
990 	int error;
991 
992 	if ((error = getsock(p, SCARG(uap, s), &fp)) != 0)
993 		return (error);
994 	error = pledge_sockopt(p, 0, SCARG(uap, level), SCARG(uap, name));
995 	if (error)
996 		goto out;
997 	if (SCARG(uap, val)) {
998 		error = copyin(SCARG(uap, avalsize),
999 		    &valsize, sizeof (valsize));
1000 		if (error)
1001 			goto out;
1002 	} else
1003 		valsize = 0;
1004 	m = m_get(M_WAIT, MT_SOOPTS);
1005 	so = fp->f_data;
1006 	solock(so);
1007 	error = sogetopt(so, SCARG(uap, level), SCARG(uap, name), m);
1008 	sounlock(so);
1009 	if (error == 0 && SCARG(uap, val) && valsize && m != NULL) {
1010 		if (valsize > m->m_len)
1011 			valsize = m->m_len;
1012 		error = copyout(mtod(m, caddr_t), SCARG(uap, val), valsize);
1013 		if (error == 0)
1014 			error = copyout(&valsize,
1015 			    SCARG(uap, avalsize), sizeof (valsize));
1016 	}
1017 	m_free(m);
1018 out:
1019 	FRELE(fp, p);
1020 	return (error);
1021 }
1022 
1023 /*
1024  * Get socket name.
1025  */
1026 int
1027 sys_getsockname(struct proc *p, void *v, register_t *retval)
1028 {
1029 	struct sys_getsockname_args /* {
1030 		syscallarg(int) fdes;
1031 		syscallarg(struct sockaddr *) asa;
1032 		syscallarg(socklen_t *) alen;
1033 	} */ *uap = v;
1034 	struct file *fp;
1035 	struct socket *so;
1036 	struct mbuf *m = NULL;
1037 	socklen_t len;
1038 	int error;
1039 
1040 	if ((error = getsock(p, SCARG(uap, fdes), &fp)) != 0)
1041 		return (error);
1042 	error = copyin(SCARG(uap, alen), &len, sizeof (len));
1043 	if (error)
1044 		goto bad;
1045 	so = fp->f_data;
1046 	error = pledge_socket(p, -1, so->so_state);
1047 	if (error)
1048 		goto bad;
1049 	m = m_getclr(M_WAIT, MT_SONAME);
1050 	solock(so);
1051 	error = (*so->so_proto->pr_usrreq)(so, PRU_SOCKADDR, NULL, m, NULL, p);
1052 	sounlock(so);
1053 	if (error)
1054 		goto bad;
1055 	error = copyaddrout(p, m, SCARG(uap, asa), len, SCARG(uap, alen));
1056 bad:
1057 	FRELE(fp, p);
1058 	m_freem(m);
1059 	return (error);
1060 }
1061 
1062 /*
1063  * Get name of peer for connected socket.
1064  */
1065 int
1066 sys_getpeername(struct proc *p, void *v, register_t *retval)
1067 {
1068 	struct sys_getpeername_args /* {
1069 		syscallarg(int) fdes;
1070 		syscallarg(struct sockaddr *) asa;
1071 		syscallarg(socklen_t *) alen;
1072 	} */ *uap = v;
1073 	struct file *fp;
1074 	struct socket *so;
1075 	struct mbuf *m = NULL;
1076 	socklen_t len;
1077 	int error;
1078 
1079 	if ((error = getsock(p, SCARG(uap, fdes), &fp)) != 0)
1080 		return (error);
1081 	so = fp->f_data;
1082 	error = pledge_socket(p, -1, so->so_state);
1083 	if (error)
1084 		goto bad;
1085 	if ((so->so_state & SS_ISCONNECTED) == 0) {
1086 		error = ENOTCONN;
1087 		goto bad;
1088 	}
1089 	error = copyin(SCARG(uap, alen), &len, sizeof (len));
1090 	if (error)
1091 		goto bad;
1092 	m = m_getclr(M_WAIT, MT_SONAME);
1093 	solock(so);
1094 	error = (*so->so_proto->pr_usrreq)(so, PRU_PEERADDR, NULL, m, NULL, p);
1095 	sounlock(so);
1096 	if (error)
1097 		goto bad;
1098 	error = copyaddrout(p, m, SCARG(uap, asa), len, SCARG(uap, alen));
1099 bad:
1100 	FRELE(fp, p);
1101 	m_freem(m);
1102 	return (error);
1103 }
1104 
1105 int
1106 sockargs(struct mbuf **mp, const void *buf, size_t buflen, int type)
1107 {
1108 	struct sockaddr *sa;
1109 	struct mbuf *m;
1110 	int error;
1111 
1112 	/*
1113 	 * We can't allow socket names > UCHAR_MAX in length, since that
1114 	 * will overflow sa_len. Also, control data more than MCLBYTES in
1115 	 * length is just too much.
1116 	 * Memory for sa_len and sa_family must exist.
1117 	 */
1118 	if ((buflen > (type == MT_SONAME ? UCHAR_MAX : MCLBYTES)) ||
1119 	    (type == MT_SONAME && buflen < offsetof(struct sockaddr, sa_data)))
1120 		return (EINVAL);
1121 
1122 	/* Allocate an mbuf to hold the arguments. */
1123 	m = m_get(M_WAIT, type);
1124 	if (buflen > MLEN) {
1125 		MCLGET(m, M_WAITOK);
1126 		if ((m->m_flags & M_EXT) == 0) {
1127 			m_free(m);
1128 			return ENOBUFS;
1129 		}
1130 	}
1131 	m->m_len = buflen;
1132 	error = copyin(buf, mtod(m, caddr_t), buflen);
1133 	if (error) {
1134 		(void) m_free(m);
1135 		return (error);
1136 	}
1137 	*mp = m;
1138 	if (type == MT_SONAME) {
1139 		sa = mtod(m, struct sockaddr *);
1140 		sa->sa_len = buflen;
1141 	}
1142 	return (0);
1143 }
1144 
1145 int
1146 getsock(struct proc *p, int fdes, struct file **fpp)
1147 {
1148 	struct file *fp;
1149 
1150 	fp = fd_getfile(p->p_fd, fdes);
1151 	if (fp == NULL)
1152 		return (EBADF);
1153 	if (fp->f_type != DTYPE_SOCKET) {
1154 		FRELE(fp, p);
1155 		return (ENOTSOCK);
1156 	}
1157 	*fpp = fp;
1158 
1159 	return (0);
1160 }
1161 
1162 int
1163 sys_setrtable(struct proc *p, void *v, register_t *retval)
1164 {
1165 	struct sys_setrtable_args /* {
1166 		syscallarg(int) rtableid;
1167 	} */ *uap = v;
1168 	u_int ps_rtableid = p->p_p->ps_rtableid;
1169 	int rtableid, error;
1170 
1171 	rtableid = SCARG(uap, rtableid);
1172 
1173 	if (ps_rtableid == rtableid)
1174 		return (0);
1175 	if (ps_rtableid != 0 && (error = suser(p)) != 0)
1176 		return (error);
1177 	if (rtableid < 0 || !rtable_exists((u_int)rtableid))
1178 		return (EINVAL);
1179 
1180 	p->p_p->ps_rtableid = (u_int)rtableid;
1181 	return (0);
1182 }
1183 
1184 int
1185 sys_getrtable(struct proc *p, void *v, register_t *retval)
1186 {
1187 	*retval = (int)p->p_p->ps_rtableid;
1188 	return (0);
1189 }
1190 
1191 int
1192 copyaddrout(struct proc *p, struct mbuf *name, struct sockaddr *sa,
1193     socklen_t buflen, socklen_t *outlen)
1194 {
1195 	int error;
1196 	socklen_t namelen = name->m_len;
1197 
1198 	/* SHOULD COPY OUT A CHAIN HERE */
1199 	error = copyout(mtod(name, caddr_t), sa, MIN(buflen, namelen));
1200 	if (error == 0) {
1201 #ifdef KTRACE
1202 		if (KTRPOINT(p, KTR_STRUCT))
1203 			ktrsockaddr(p, mtod(name, caddr_t), namelen);
1204 #endif
1205 		error = copyout(&namelen, outlen, sizeof(*outlen));
1206 	}
1207 
1208 	return (error);
1209 }
1210