1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.65.2.17 2003/04/04 17:11:16 tegge Exp $ 38 * $DragonFly: src/sys/kern/uipc_syscalls.c,v 1.92 2008/11/26 13:10:56 sephe Exp $ 39 */ 40 41 #include "opt_ktrace.h" 42 #include "opt_sctp.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/sysproto.h> 48 #include <sys/malloc.h> 49 #include <sys/filedesc.h> 50 #include <sys/event.h> 51 #include <sys/proc.h> 52 #include <sys/fcntl.h> 53 #include <sys/file.h> 54 #include <sys/filio.h> 55 #include <sys/kern_syscall.h> 56 #include <sys/mbuf.h> 57 #include <sys/protosw.h> 58 #include <sys/sfbuf.h> 59 #include <sys/socket.h> 60 #include <sys/socketvar.h> 61 #include <sys/socketops.h> 62 #include <sys/uio.h> 63 #include <sys/vnode.h> 64 #include <sys/lock.h> 65 #include <sys/mount.h> 66 #ifdef KTRACE 67 #include <sys/ktrace.h> 68 #endif 69 #include <vm/vm.h> 70 #include <vm/vm_object.h> 71 #include <vm/vm_page.h> 72 #include <vm/vm_pageout.h> 73 #include <vm/vm_kern.h> 74 #include <vm/vm_extern.h> 75 #include <sys/file2.h> 76 #include <sys/signalvar.h> 77 #include <sys/serialize.h> 78 79 #include <sys/thread2.h> 80 #include <sys/msgport2.h> 81 #include <sys/socketvar2.h> 82 #include <sys/mplock2.h> 83 #include <net/netmsg2.h> 84 85 #ifdef SCTP 86 #include <netinet/sctp_peeloff.h> 87 #endif /* SCTP */ 88 89 /* 90 * System call interface to the socket abstraction. 91 */ 92 93 extern struct fileops socketops; 94 95 /* 96 * socket_args(int domain, int type, int protocol) 97 */ 98 int 99 kern_socket(int domain, int type, int protocol, int *res) 100 { 101 struct thread *td = curthread; 102 struct filedesc *fdp = td->td_proc->p_fd; 103 struct socket *so; 104 struct file *fp; 105 int fd, error; 106 107 KKASSERT(td->td_lwp); 108 109 error = falloc(td->td_lwp, &fp, &fd); 110 if (error) 111 return (error); 112 error = socreate(domain, &so, type, protocol, td); 113 if (error) { 114 fsetfd(fdp, NULL, fd); 115 } else { 116 fp->f_type = DTYPE_SOCKET; 117 fp->f_flag = FREAD | FWRITE; 118 fp->f_ops = &socketops; 119 fp->f_data = so; 120 *res = fd; 121 fsetfd(fdp, fp, fd); 122 } 123 fdrop(fp); 124 return (error); 125 } 126 127 /* 128 * MPALMOSTSAFE 129 */ 130 int 131 sys_socket(struct socket_args *uap) 132 { 133 int error; 134 135 get_mplock(); 136 error = kern_socket(uap->domain, uap->type, uap->protocol, 137 &uap->sysmsg_iresult); 138 rel_mplock(); 139 140 return (error); 141 } 142 143 int 144 kern_bind(int s, struct sockaddr *sa) 145 { 146 struct thread *td = curthread; 147 struct proc *p = td->td_proc; 148 struct file *fp; 149 int error; 150 151 KKASSERT(p); 152 error = holdsock(p->p_fd, s, &fp); 153 if (error) 154 return (error); 155 error = sobind((struct socket *)fp->f_data, sa, td); 156 fdrop(fp); 157 return (error); 158 } 159 160 /* 161 * bind_args(int s, caddr_t name, int namelen) 162 * 163 * MPALMOSTSAFE 164 */ 165 int 166 sys_bind(struct bind_args *uap) 167 { 168 struct sockaddr *sa; 169 int error; 170 171 error = getsockaddr(&sa, uap->name, uap->namelen); 172 if (error) 173 return (error); 174 get_mplock(); 175 error = kern_bind(uap->s, sa); 176 rel_mplock(); 177 FREE(sa, M_SONAME); 178 179 return (error); 180 } 181 182 int 183 kern_listen(int s, int backlog) 184 { 185 struct thread *td = curthread; 186 struct proc *p = td->td_proc; 187 struct file *fp; 188 int error; 189 190 KKASSERT(p); 191 error = holdsock(p->p_fd, s, &fp); 192 if (error) 193 return (error); 194 error = solisten((struct socket *)fp->f_data, backlog, td); 195 fdrop(fp); 196 return(error); 197 } 198 199 /* 200 * listen_args(int s, int backlog) 201 * 202 * MPALMOSTSAFE 203 */ 204 int 205 sys_listen(struct listen_args *uap) 206 { 207 int error; 208 209 get_mplock(); 210 error = kern_listen(uap->s, uap->backlog); 211 rel_mplock(); 212 return (error); 213 } 214 215 /* 216 * Returns the accepted socket as well. 217 */ 218 static boolean_t 219 soaccept_predicate(struct netmsg *msg0) 220 { 221 struct netmsg_so_notify *msg = (struct netmsg_so_notify *)msg0; 222 struct socket *head = msg->nm_so; 223 224 if (head->so_error != 0) { 225 msg->nm_netmsg.nm_lmsg.ms_error = head->so_error; 226 return (TRUE); 227 } 228 if (!TAILQ_EMPTY(&head->so_comp)) { 229 /* Abuse nm_so field as copy in/copy out parameter. XXX JH */ 230 msg->nm_so = TAILQ_FIRST(&head->so_comp); 231 TAILQ_REMOVE(&head->so_comp, msg->nm_so, so_list); 232 head->so_qlen--; 233 234 msg->nm_netmsg.nm_lmsg.ms_error = 0; 235 return (TRUE); 236 } 237 if (head->so_state & SS_CANTRCVMORE) { 238 msg->nm_netmsg.nm_lmsg.ms_error = ECONNABORTED; 239 return (TRUE); 240 } 241 if (msg->nm_fflags & FNONBLOCK) { 242 msg->nm_netmsg.nm_lmsg.ms_error = EWOULDBLOCK; 243 return (TRUE); 244 } 245 246 return (FALSE); 247 } 248 249 /* 250 * The second argument to kern_accept() is a handle to a struct sockaddr. 251 * This allows kern_accept() to return a pointer to an allocated struct 252 * sockaddr which must be freed later with FREE(). The caller must 253 * initialize *name to NULL. 254 */ 255 int 256 kern_accept(int s, int fflags, struct sockaddr **name, int *namelen, int *res) 257 { 258 struct thread *td = curthread; 259 struct filedesc *fdp = td->td_proc->p_fd; 260 struct file *lfp = NULL; 261 struct file *nfp = NULL; 262 struct sockaddr *sa; 263 struct socket *head, *so; 264 struct netmsg_so_notify msg; 265 int fd; 266 u_int fflag; /* type must match fp->f_flag */ 267 int error, tmp; 268 269 *res = -1; 270 if (name && namelen && *namelen < 0) 271 return (EINVAL); 272 273 error = holdsock(td->td_proc->p_fd, s, &lfp); 274 if (error) 275 return (error); 276 277 error = falloc(td->td_lwp, &nfp, &fd); 278 if (error) { /* Probably ran out of file descriptors. */ 279 fdrop(lfp); 280 return (error); 281 } 282 head = (struct socket *)lfp->f_data; 283 if ((head->so_options & SO_ACCEPTCONN) == 0) { 284 error = EINVAL; 285 goto done; 286 } 287 288 if (fflags & O_FBLOCKING) 289 fflags |= lfp->f_flag & ~FNONBLOCK; 290 else if (fflags & O_FNONBLOCKING) 291 fflags |= lfp->f_flag | FNONBLOCK; 292 else 293 fflags = lfp->f_flag; 294 295 /* optimize for uniprocessor case later XXX JH */ 296 netmsg_init_abortable(&msg.nm_netmsg, head, &curthread->td_msgport, 297 0, netmsg_so_notify, netmsg_so_notify_doabort); 298 msg.nm_predicate = soaccept_predicate; 299 msg.nm_fflags = fflags; 300 msg.nm_so = head; 301 msg.nm_etype = NM_REVENT; 302 error = lwkt_domsg(head->so_port, &msg.nm_netmsg.nm_lmsg, PCATCH); 303 if (error) 304 goto done; 305 306 /* 307 * At this point we have the connection that's ready to be accepted. 308 */ 309 so = msg.nm_so; 310 311 fflag = lfp->f_flag; 312 313 /* connection has been removed from the listen queue */ 314 KNOTE(&head->so_rcv.ssb_kq.ki_note, 0); 315 316 so->so_state &= ~SS_COMP; 317 so->so_head = NULL; 318 if (head->so_sigio != NULL) 319 fsetown(fgetown(head->so_sigio), &so->so_sigio); 320 321 nfp->f_type = DTYPE_SOCKET; 322 nfp->f_flag = fflag; 323 nfp->f_ops = &socketops; 324 nfp->f_data = so; 325 /* Sync socket nonblocking/async state with file flags */ 326 tmp = fflag & FNONBLOCK; 327 fo_ioctl(nfp, FIONBIO, (caddr_t)&tmp, td->td_ucred, NULL); 328 tmp = fflag & FASYNC; 329 fo_ioctl(nfp, FIOASYNC, (caddr_t)&tmp, td->td_ucred, NULL); 330 331 sa = NULL; 332 error = soaccept(so, &sa); 333 334 /* 335 * Set the returned name and namelen as applicable. Set the returned 336 * namelen to 0 for older code which might ignore the return value 337 * from accept. 338 */ 339 if (error == 0) { 340 if (sa && name && namelen) { 341 if (*namelen > sa->sa_len) 342 *namelen = sa->sa_len; 343 *name = sa; 344 } else { 345 if (sa) 346 FREE(sa, M_SONAME); 347 } 348 } 349 350 done: 351 /* 352 * If an error occured clear the reserved descriptor, else associate 353 * nfp with it. 354 * 355 * Note that *res is normally ignored if an error is returned but 356 * a syscall message will still have access to the result code. 357 */ 358 if (error) { 359 fsetfd(fdp, NULL, fd); 360 } else { 361 *res = fd; 362 fsetfd(fdp, nfp, fd); 363 } 364 fdrop(nfp); 365 fdrop(lfp); 366 return (error); 367 } 368 369 /* 370 * accept(int s, caddr_t name, int *anamelen) 371 * 372 * MPALMOSTSAFE 373 */ 374 int 375 sys_accept(struct accept_args *uap) 376 { 377 struct sockaddr *sa = NULL; 378 int sa_len; 379 int error; 380 381 if (uap->name) { 382 error = copyin(uap->anamelen, &sa_len, sizeof(sa_len)); 383 if (error) 384 return (error); 385 386 get_mplock(); 387 error = kern_accept(uap->s, 0, &sa, &sa_len, 388 &uap->sysmsg_iresult); 389 rel_mplock(); 390 391 if (error == 0) 392 error = copyout(sa, uap->name, sa_len); 393 if (error == 0) { 394 error = copyout(&sa_len, uap->anamelen, 395 sizeof(*uap->anamelen)); 396 } 397 if (sa) 398 FREE(sa, M_SONAME); 399 } else { 400 get_mplock(); 401 error = kern_accept(uap->s, 0, NULL, 0, 402 &uap->sysmsg_iresult); 403 rel_mplock(); 404 } 405 return (error); 406 } 407 408 /* 409 * extaccept(int s, int fflags, caddr_t name, int *anamelen) 410 * 411 * MPALMOSTSAFE 412 */ 413 int 414 sys_extaccept(struct extaccept_args *uap) 415 { 416 struct sockaddr *sa = NULL; 417 int sa_len; 418 int error; 419 int fflags = uap->flags & O_FMASK; 420 421 if (uap->name) { 422 error = copyin(uap->anamelen, &sa_len, sizeof(sa_len)); 423 if (error) 424 return (error); 425 426 get_mplock(); 427 error = kern_accept(uap->s, fflags, &sa, &sa_len, 428 &uap->sysmsg_iresult); 429 rel_mplock(); 430 431 if (error == 0) 432 error = copyout(sa, uap->name, sa_len); 433 if (error == 0) { 434 error = copyout(&sa_len, uap->anamelen, 435 sizeof(*uap->anamelen)); 436 } 437 if (sa) 438 FREE(sa, M_SONAME); 439 } else { 440 get_mplock(); 441 error = kern_accept(uap->s, fflags, NULL, 0, 442 &uap->sysmsg_iresult); 443 rel_mplock(); 444 } 445 return (error); 446 } 447 448 449 /* 450 * Returns TRUE if predicate satisfied. 451 */ 452 static boolean_t 453 soconnected_predicate(struct netmsg *msg0) 454 { 455 struct netmsg_so_notify *msg = (struct netmsg_so_notify *)msg0; 456 struct socket *so = msg->nm_so; 457 458 /* check predicate */ 459 if (!(so->so_state & SS_ISCONNECTING) || so->so_error != 0) { 460 msg->nm_netmsg.nm_lmsg.ms_error = so->so_error; 461 return (TRUE); 462 } 463 464 return (FALSE); 465 } 466 467 int 468 kern_connect(int s, int fflags, struct sockaddr *sa) 469 { 470 struct thread *td = curthread; 471 struct proc *p = td->td_proc; 472 struct file *fp; 473 struct socket *so; 474 int error, interrupted = 0; 475 476 error = holdsock(p->p_fd, s, &fp); 477 if (error) 478 return (error); 479 so = (struct socket *)fp->f_data; 480 481 if (fflags & O_FBLOCKING) 482 /* fflags &= ~FNONBLOCK; */; 483 else if (fflags & O_FNONBLOCKING) 484 fflags |= FNONBLOCK; 485 else 486 fflags = fp->f_flag; 487 488 if (so->so_state & SS_ISCONNECTING) { 489 error = EALREADY; 490 goto done; 491 } 492 error = soconnect(so, sa, td); 493 if (error) 494 goto bad; 495 if ((fflags & FNONBLOCK) && (so->so_state & SS_ISCONNECTING)) { 496 error = EINPROGRESS; 497 goto done; 498 } 499 if ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 500 struct netmsg_so_notify msg; 501 502 netmsg_init_abortable(&msg.nm_netmsg, so, 503 &curthread->td_msgport, 504 0, 505 netmsg_so_notify, 506 netmsg_so_notify_doabort); 507 msg.nm_predicate = soconnected_predicate; 508 msg.nm_so = so; 509 msg.nm_etype = NM_REVENT; 510 error = lwkt_domsg(so->so_port, &msg.nm_netmsg.nm_lmsg, PCATCH); 511 if (error == EINTR || error == ERESTART) 512 interrupted = 1; 513 } 514 if (error == 0) { 515 error = so->so_error; 516 so->so_error = 0; 517 } 518 bad: 519 if (!interrupted) 520 so->so_state &= ~SS_ISCONNECTING; 521 if (error == ERESTART) 522 error = EINTR; 523 done: 524 fdrop(fp); 525 return (error); 526 } 527 528 /* 529 * connect_args(int s, caddr_t name, int namelen) 530 * 531 * MPALMOSTSAFE 532 */ 533 int 534 sys_connect(struct connect_args *uap) 535 { 536 struct sockaddr *sa; 537 int error; 538 539 error = getsockaddr(&sa, uap->name, uap->namelen); 540 if (error) 541 return (error); 542 get_mplock(); 543 error = kern_connect(uap->s, 0, sa); 544 rel_mplock(); 545 FREE(sa, M_SONAME); 546 547 return (error); 548 } 549 550 /* 551 * connect_args(int s, int fflags, caddr_t name, int namelen) 552 * 553 * MPALMOSTSAFE 554 */ 555 int 556 sys_extconnect(struct extconnect_args *uap) 557 { 558 struct sockaddr *sa; 559 int error; 560 int fflags = uap->flags & O_FMASK; 561 562 error = getsockaddr(&sa, uap->name, uap->namelen); 563 if (error) 564 return (error); 565 get_mplock(); 566 error = kern_connect(uap->s, fflags, sa); 567 rel_mplock(); 568 FREE(sa, M_SONAME); 569 570 return (error); 571 } 572 573 int 574 kern_socketpair(int domain, int type, int protocol, int *sv) 575 { 576 struct thread *td = curthread; 577 struct filedesc *fdp; 578 struct file *fp1, *fp2; 579 struct socket *so1, *so2; 580 int fd1, fd2, error; 581 582 fdp = td->td_proc->p_fd; 583 error = socreate(domain, &so1, type, protocol, td); 584 if (error) 585 return (error); 586 error = socreate(domain, &so2, type, protocol, td); 587 if (error) 588 goto free1; 589 error = falloc(td->td_lwp, &fp1, &fd1); 590 if (error) 591 goto free2; 592 sv[0] = fd1; 593 fp1->f_data = so1; 594 error = falloc(td->td_lwp, &fp2, &fd2); 595 if (error) 596 goto free3; 597 fp2->f_data = so2; 598 sv[1] = fd2; 599 error = soconnect2(so1, so2); 600 if (error) 601 goto free4; 602 if (type == SOCK_DGRAM) { 603 /* 604 * Datagram socket connection is asymmetric. 605 */ 606 error = soconnect2(so2, so1); 607 if (error) 608 goto free4; 609 } 610 fp1->f_type = fp2->f_type = DTYPE_SOCKET; 611 fp1->f_flag = fp2->f_flag = FREAD|FWRITE; 612 fp1->f_ops = fp2->f_ops = &socketops; 613 fsetfd(fdp, fp1, fd1); 614 fsetfd(fdp, fp2, fd2); 615 fdrop(fp1); 616 fdrop(fp2); 617 return (error); 618 free4: 619 fsetfd(fdp, NULL, fd2); 620 fdrop(fp2); 621 free3: 622 fsetfd(fdp, NULL, fd1); 623 fdrop(fp1); 624 free2: 625 (void)soclose(so2, 0); 626 free1: 627 (void)soclose(so1, 0); 628 return (error); 629 } 630 631 /* 632 * socketpair(int domain, int type, int protocol, int *rsv) 633 * 634 * MPALMOSTSAFE 635 */ 636 int 637 sys_socketpair(struct socketpair_args *uap) 638 { 639 int error, sockv[2]; 640 641 get_mplock(); 642 error = kern_socketpair(uap->domain, uap->type, uap->protocol, sockv); 643 rel_mplock(); 644 645 if (error == 0) 646 error = copyout(sockv, uap->rsv, sizeof(sockv)); 647 return (error); 648 } 649 650 int 651 kern_sendmsg(int s, struct sockaddr *sa, struct uio *auio, 652 struct mbuf *control, int flags, size_t *res) 653 { 654 struct thread *td = curthread; 655 struct lwp *lp = td->td_lwp; 656 struct proc *p = td->td_proc; 657 struct file *fp; 658 size_t len; 659 int error; 660 struct socket *so; 661 #ifdef KTRACE 662 struct iovec *ktriov = NULL; 663 struct uio ktruio; 664 #endif 665 666 error = holdsock(p->p_fd, s, &fp); 667 if (error) 668 return (error); 669 #ifdef KTRACE 670 if (KTRPOINT(td, KTR_GENIO)) { 671 int iovlen = auio->uio_iovcnt * sizeof (struct iovec); 672 673 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 674 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen); 675 ktruio = *auio; 676 } 677 #endif 678 len = auio->uio_resid; 679 so = (struct socket *)fp->f_data; 680 if ((flags & (MSG_FNONBLOCKING|MSG_FBLOCKING)) == 0) { 681 if (fp->f_flag & FNONBLOCK) 682 flags |= MSG_FNONBLOCKING; 683 } 684 error = so_pru_sosend(so, sa, auio, NULL, control, flags, td); 685 if (error) { 686 if (auio->uio_resid != len && (error == ERESTART || 687 error == EINTR || error == EWOULDBLOCK)) 688 error = 0; 689 if (error == EPIPE) 690 lwpsignal(p, lp, SIGPIPE); 691 } 692 #ifdef KTRACE 693 if (ktriov != NULL) { 694 if (error == 0) { 695 ktruio.uio_iov = ktriov; 696 ktruio.uio_resid = len - auio->uio_resid; 697 ktrgenio(lp, s, UIO_WRITE, &ktruio, error); 698 } 699 FREE(ktriov, M_TEMP); 700 } 701 #endif 702 if (error == 0) 703 *res = len - auio->uio_resid; 704 fdrop(fp); 705 return (error); 706 } 707 708 /* 709 * sendto_args(int s, caddr_t buf, size_t len, int flags, caddr_t to, int tolen) 710 * 711 * MPALMOSTSAFE 712 */ 713 int 714 sys_sendto(struct sendto_args *uap) 715 { 716 struct thread *td = curthread; 717 struct uio auio; 718 struct iovec aiov; 719 struct sockaddr *sa = NULL; 720 int error; 721 722 if (uap->to) { 723 error = getsockaddr(&sa, uap->to, uap->tolen); 724 if (error) 725 return (error); 726 } 727 aiov.iov_base = uap->buf; 728 aiov.iov_len = uap->len; 729 auio.uio_iov = &aiov; 730 auio.uio_iovcnt = 1; 731 auio.uio_offset = 0; 732 auio.uio_resid = uap->len; 733 auio.uio_segflg = UIO_USERSPACE; 734 auio.uio_rw = UIO_WRITE; 735 auio.uio_td = td; 736 737 get_mplock(); 738 error = kern_sendmsg(uap->s, sa, &auio, NULL, uap->flags, 739 &uap->sysmsg_szresult); 740 rel_mplock(); 741 742 if (sa) 743 FREE(sa, M_SONAME); 744 return (error); 745 } 746 747 /* 748 * sendmsg_args(int s, caddr_t msg, int flags) 749 * 750 * MPALMOSTSAFE 751 */ 752 int 753 sys_sendmsg(struct sendmsg_args *uap) 754 { 755 struct thread *td = curthread; 756 struct msghdr msg; 757 struct uio auio; 758 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 759 struct sockaddr *sa = NULL; 760 struct mbuf *control = NULL; 761 int error; 762 763 error = copyin(uap->msg, (caddr_t)&msg, sizeof(msg)); 764 if (error) 765 return (error); 766 767 /* 768 * Conditionally copyin msg.msg_name. 769 */ 770 if (msg.msg_name) { 771 error = getsockaddr(&sa, msg.msg_name, msg.msg_namelen); 772 if (error) 773 return (error); 774 } 775 776 /* 777 * Populate auio. 778 */ 779 error = iovec_copyin(msg.msg_iov, &iov, aiov, msg.msg_iovlen, 780 &auio.uio_resid); 781 if (error) 782 goto cleanup2; 783 auio.uio_iov = iov; 784 auio.uio_iovcnt = msg.msg_iovlen; 785 auio.uio_offset = 0; 786 auio.uio_segflg = UIO_USERSPACE; 787 auio.uio_rw = UIO_WRITE; 788 auio.uio_td = td; 789 790 /* 791 * Conditionally copyin msg.msg_control. 792 */ 793 if (msg.msg_control) { 794 if (msg.msg_controllen < sizeof(struct cmsghdr) || 795 msg.msg_controllen > MLEN) { 796 error = EINVAL; 797 goto cleanup; 798 } 799 control = m_get(MB_WAIT, MT_CONTROL); 800 if (control == NULL) { 801 error = ENOBUFS; 802 goto cleanup; 803 } 804 control->m_len = msg.msg_controllen; 805 error = copyin(msg.msg_control, mtod(control, caddr_t), 806 msg.msg_controllen); 807 if (error) { 808 m_free(control); 809 goto cleanup; 810 } 811 } 812 813 get_mplock(); 814 error = kern_sendmsg(uap->s, sa, &auio, control, uap->flags, 815 &uap->sysmsg_szresult); 816 rel_mplock(); 817 818 cleanup: 819 iovec_free(&iov, aiov); 820 cleanup2: 821 if (sa) 822 FREE(sa, M_SONAME); 823 return (error); 824 } 825 826 /* 827 * kern_recvmsg() takes a handle to sa and control. If the handle is non- 828 * null, it returns a dynamically allocated struct sockaddr and an mbuf. 829 * Don't forget to FREE() and m_free() these if they are returned. 830 */ 831 int 832 kern_recvmsg(int s, struct sockaddr **sa, struct uio *auio, 833 struct mbuf **control, int *flags, size_t *res) 834 { 835 struct thread *td = curthread; 836 struct proc *p = td->td_proc; 837 struct file *fp; 838 size_t len; 839 int error; 840 int lflags; 841 struct socket *so; 842 #ifdef KTRACE 843 struct iovec *ktriov = NULL; 844 struct uio ktruio; 845 #endif 846 847 error = holdsock(p->p_fd, s, &fp); 848 if (error) 849 return (error); 850 #ifdef KTRACE 851 if (KTRPOINT(td, KTR_GENIO)) { 852 int iovlen = auio->uio_iovcnt * sizeof (struct iovec); 853 854 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 855 bcopy(auio->uio_iov, ktriov, iovlen); 856 ktruio = *auio; 857 } 858 #endif 859 len = auio->uio_resid; 860 so = (struct socket *)fp->f_data; 861 862 if (flags == NULL || (*flags & (MSG_FNONBLOCKING|MSG_FBLOCKING)) == 0) { 863 if (fp->f_flag & FNONBLOCK) { 864 if (flags) { 865 *flags |= MSG_FNONBLOCKING; 866 } else { 867 lflags = MSG_FNONBLOCKING; 868 flags = &lflags; 869 } 870 } 871 } 872 873 error = so_pru_soreceive(so, sa, auio, NULL, control, flags); 874 if (error) { 875 if (auio->uio_resid != len && (error == ERESTART || 876 error == EINTR || error == EWOULDBLOCK)) 877 error = 0; 878 } 879 #ifdef KTRACE 880 if (ktriov != NULL) { 881 if (error == 0) { 882 ktruio.uio_iov = ktriov; 883 ktruio.uio_resid = len - auio->uio_resid; 884 ktrgenio(td->td_lwp, s, UIO_READ, &ktruio, error); 885 } 886 FREE(ktriov, M_TEMP); 887 } 888 #endif 889 if (error == 0) 890 *res = len - auio->uio_resid; 891 fdrop(fp); 892 return (error); 893 } 894 895 /* 896 * recvfrom_args(int s, caddr_t buf, size_t len, int flags, 897 * caddr_t from, int *fromlenaddr) 898 * 899 * MPALMOSTSAFE 900 */ 901 int 902 sys_recvfrom(struct recvfrom_args *uap) 903 { 904 struct thread *td = curthread; 905 struct uio auio; 906 struct iovec aiov; 907 struct sockaddr *sa = NULL; 908 int error, fromlen; 909 910 if (uap->from && uap->fromlenaddr) { 911 error = copyin(uap->fromlenaddr, &fromlen, sizeof(fromlen)); 912 if (error) 913 return (error); 914 if (fromlen < 0) 915 return (EINVAL); 916 } else { 917 fromlen = 0; 918 } 919 aiov.iov_base = uap->buf; 920 aiov.iov_len = uap->len; 921 auio.uio_iov = &aiov; 922 auio.uio_iovcnt = 1; 923 auio.uio_offset = 0; 924 auio.uio_resid = uap->len; 925 auio.uio_segflg = UIO_USERSPACE; 926 auio.uio_rw = UIO_READ; 927 auio.uio_td = td; 928 929 get_mplock(); 930 error = kern_recvmsg(uap->s, uap->from ? &sa : NULL, &auio, NULL, 931 &uap->flags, &uap->sysmsg_szresult); 932 rel_mplock(); 933 934 if (error == 0 && uap->from) { 935 /* note: sa may still be NULL */ 936 if (sa) { 937 fromlen = MIN(fromlen, sa->sa_len); 938 error = copyout(sa, uap->from, fromlen); 939 } else { 940 fromlen = 0; 941 } 942 if (error == 0) { 943 error = copyout(&fromlen, uap->fromlenaddr, 944 sizeof(fromlen)); 945 } 946 } 947 if (sa) 948 FREE(sa, M_SONAME); 949 950 return (error); 951 } 952 953 /* 954 * recvmsg_args(int s, struct msghdr *msg, int flags) 955 * 956 * MPALMOSTSAFE 957 */ 958 int 959 sys_recvmsg(struct recvmsg_args *uap) 960 { 961 struct thread *td = curthread; 962 struct msghdr msg; 963 struct uio auio; 964 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 965 struct mbuf *m, *control = NULL; 966 struct sockaddr *sa = NULL; 967 caddr_t ctlbuf; 968 socklen_t *ufromlenp, *ucontrollenp; 969 int error, fromlen, controllen, len, flags, *uflagsp; 970 971 /* 972 * This copyin handles everything except the iovec. 973 */ 974 error = copyin(uap->msg, &msg, sizeof(msg)); 975 if (error) 976 return (error); 977 978 if (msg.msg_name && msg.msg_namelen < 0) 979 return (EINVAL); 980 if (msg.msg_control && msg.msg_controllen < 0) 981 return (EINVAL); 982 983 ufromlenp = (socklen_t *)((caddr_t)uap->msg + offsetof(struct msghdr, 984 msg_namelen)); 985 ucontrollenp = (socklen_t *)((caddr_t)uap->msg + offsetof(struct msghdr, 986 msg_controllen)); 987 uflagsp = (int *)((caddr_t)uap->msg + offsetof(struct msghdr, 988 msg_flags)); 989 990 /* 991 * Populate auio. 992 */ 993 error = iovec_copyin(msg.msg_iov, &iov, aiov, msg.msg_iovlen, 994 &auio.uio_resid); 995 if (error) 996 return (error); 997 auio.uio_iov = iov; 998 auio.uio_iovcnt = msg.msg_iovlen; 999 auio.uio_offset = 0; 1000 auio.uio_segflg = UIO_USERSPACE; 1001 auio.uio_rw = UIO_READ; 1002 auio.uio_td = td; 1003 1004 flags = uap->flags; 1005 1006 get_mplock(); 1007 error = kern_recvmsg(uap->s, 1008 (msg.msg_name ? &sa : NULL), &auio, 1009 (msg.msg_control ? &control : NULL), &flags, 1010 &uap->sysmsg_szresult); 1011 rel_mplock(); 1012 1013 /* 1014 * Conditionally copyout the name and populate the namelen field. 1015 */ 1016 if (error == 0 && msg.msg_name) { 1017 /* note: sa may still be NULL */ 1018 if (sa != NULL) { 1019 fromlen = MIN(msg.msg_namelen, sa->sa_len); 1020 error = copyout(sa, msg.msg_name, fromlen); 1021 } else { 1022 fromlen = 0; 1023 } 1024 if (error == 0) 1025 error = copyout(&fromlen, ufromlenp, 1026 sizeof(*ufromlenp)); 1027 } 1028 1029 /* 1030 * Copyout msg.msg_control and msg.msg_controllen. 1031 */ 1032 if (error == 0 && msg.msg_control) { 1033 len = msg.msg_controllen; 1034 m = control; 1035 ctlbuf = (caddr_t)msg.msg_control; 1036 1037 while(m && len > 0) { 1038 unsigned int tocopy; 1039 1040 if (len >= m->m_len) { 1041 tocopy = m->m_len; 1042 } else { 1043 msg.msg_flags |= MSG_CTRUNC; 1044 tocopy = len; 1045 } 1046 1047 error = copyout(mtod(m, caddr_t), ctlbuf, tocopy); 1048 if (error) 1049 goto cleanup; 1050 1051 ctlbuf += tocopy; 1052 len -= tocopy; 1053 m = m->m_next; 1054 } 1055 controllen = ctlbuf - (caddr_t)msg.msg_control; 1056 error = copyout(&controllen, ucontrollenp, 1057 sizeof(*ucontrollenp)); 1058 } 1059 1060 if (error == 0) 1061 error = copyout(&flags, uflagsp, sizeof(*uflagsp)); 1062 1063 cleanup: 1064 if (sa) 1065 FREE(sa, M_SONAME); 1066 iovec_free(&iov, aiov); 1067 if (control) 1068 m_freem(control); 1069 return (error); 1070 } 1071 1072 /* 1073 * If sopt->sopt_td == NULL, then sopt->sopt_val is treated as an 1074 * in kernel pointer instead of a userland pointer. This allows us 1075 * to manipulate socket options in the emulation code. 1076 */ 1077 int 1078 kern_setsockopt(int s, struct sockopt *sopt) 1079 { 1080 struct thread *td = curthread; 1081 struct proc *p = td->td_proc; 1082 struct file *fp; 1083 int error; 1084 1085 if (sopt->sopt_val == NULL && sopt->sopt_valsize != 0) 1086 return (EFAULT); 1087 if (sopt->sopt_val != NULL && sopt->sopt_valsize == 0) 1088 return (EINVAL); 1089 if (sopt->sopt_valsize < 0) 1090 return (EINVAL); 1091 1092 error = holdsock(p->p_fd, s, &fp); 1093 if (error) 1094 return (error); 1095 1096 error = sosetopt((struct socket *)fp->f_data, sopt); 1097 fdrop(fp); 1098 return (error); 1099 } 1100 1101 /* 1102 * setsockopt_args(int s, int level, int name, caddr_t val, int valsize) 1103 * 1104 * MPALMOSTSAFE 1105 */ 1106 int 1107 sys_setsockopt(struct setsockopt_args *uap) 1108 { 1109 struct thread *td = curthread; 1110 struct sockopt sopt; 1111 int error; 1112 1113 sopt.sopt_level = uap->level; 1114 sopt.sopt_name = uap->name; 1115 sopt.sopt_valsize = uap->valsize; 1116 sopt.sopt_td = td; 1117 sopt.sopt_val = NULL; 1118 1119 if (sopt.sopt_valsize < 0 || sopt.sopt_valsize > SOMAXOPT_SIZE) 1120 return (EINVAL); 1121 if (uap->val) { 1122 sopt.sopt_val = kmalloc(sopt.sopt_valsize, M_TEMP, M_WAITOK); 1123 error = copyin(uap->val, sopt.sopt_val, sopt.sopt_valsize); 1124 if (error) 1125 goto out; 1126 } 1127 1128 get_mplock(); 1129 error = kern_setsockopt(uap->s, &sopt); 1130 rel_mplock(); 1131 out: 1132 if (uap->val) 1133 kfree(sopt.sopt_val, M_TEMP); 1134 return(error); 1135 } 1136 1137 /* 1138 * If sopt->sopt_td == NULL, then sopt->sopt_val is treated as an 1139 * in kernel pointer instead of a userland pointer. This allows us 1140 * to manipulate socket options in the emulation code. 1141 */ 1142 int 1143 kern_getsockopt(int s, struct sockopt *sopt) 1144 { 1145 struct thread *td = curthread; 1146 struct proc *p = td->td_proc; 1147 struct file *fp; 1148 int error; 1149 1150 if (sopt->sopt_val == NULL && sopt->sopt_valsize != 0) 1151 return (EFAULT); 1152 if (sopt->sopt_val != NULL && sopt->sopt_valsize == 0) 1153 return (EINVAL); 1154 if (sopt->sopt_valsize < 0 || sopt->sopt_valsize > SOMAXOPT_SIZE) 1155 return (EINVAL); 1156 1157 error = holdsock(p->p_fd, s, &fp); 1158 if (error) 1159 return (error); 1160 1161 error = sogetopt((struct socket *)fp->f_data, sopt); 1162 fdrop(fp); 1163 return (error); 1164 } 1165 1166 /* 1167 * getsockopt_args(int s, int level, int name, caddr_t val, int *avalsize) 1168 * 1169 * MPALMOSTSAFE 1170 */ 1171 int 1172 sys_getsockopt(struct getsockopt_args *uap) 1173 { 1174 struct thread *td = curthread; 1175 struct sockopt sopt; 1176 int error, valsize; 1177 1178 if (uap->val) { 1179 error = copyin(uap->avalsize, &valsize, sizeof(valsize)); 1180 if (error) 1181 return (error); 1182 } else { 1183 valsize = 0; 1184 } 1185 1186 sopt.sopt_level = uap->level; 1187 sopt.sopt_name = uap->name; 1188 sopt.sopt_valsize = valsize; 1189 sopt.sopt_td = td; 1190 sopt.sopt_val = NULL; 1191 1192 if (sopt.sopt_valsize < 0 || sopt.sopt_valsize > SOMAXOPT_SIZE) 1193 return (EINVAL); 1194 if (uap->val) { 1195 sopt.sopt_val = kmalloc(sopt.sopt_valsize, M_TEMP, M_WAITOK); 1196 error = copyin(uap->val, sopt.sopt_val, sopt.sopt_valsize); 1197 if (error) 1198 goto out; 1199 } 1200 1201 get_mplock(); 1202 error = kern_getsockopt(uap->s, &sopt); 1203 rel_mplock(); 1204 if (error) 1205 goto out; 1206 valsize = sopt.sopt_valsize; 1207 error = copyout(&valsize, uap->avalsize, sizeof(valsize)); 1208 if (error) 1209 goto out; 1210 if (uap->val) 1211 error = copyout(sopt.sopt_val, uap->val, sopt.sopt_valsize); 1212 out: 1213 if (uap->val) 1214 kfree(sopt.sopt_val, M_TEMP); 1215 return (error); 1216 } 1217 1218 /* 1219 * The second argument to kern_getsockname() is a handle to a struct sockaddr. 1220 * This allows kern_getsockname() to return a pointer to an allocated struct 1221 * sockaddr which must be freed later with FREE(). The caller must 1222 * initialize *name to NULL. 1223 */ 1224 int 1225 kern_getsockname(int s, struct sockaddr **name, int *namelen) 1226 { 1227 struct thread *td = curthread; 1228 struct proc *p = td->td_proc; 1229 struct file *fp; 1230 struct socket *so; 1231 struct sockaddr *sa = NULL; 1232 int error; 1233 1234 error = holdsock(p->p_fd, s, &fp); 1235 if (error) 1236 return (error); 1237 if (*namelen < 0) { 1238 fdrop(fp); 1239 return (EINVAL); 1240 } 1241 so = (struct socket *)fp->f_data; 1242 error = so_pru_sockaddr(so, &sa); 1243 if (error == 0) { 1244 if (sa == NULL) { 1245 *namelen = 0; 1246 } else { 1247 *namelen = MIN(*namelen, sa->sa_len); 1248 *name = sa; 1249 } 1250 } 1251 1252 fdrop(fp); 1253 return (error); 1254 } 1255 1256 /* 1257 * getsockname_args(int fdes, caddr_t asa, int *alen) 1258 * 1259 * Get socket name. 1260 * 1261 * MPALMOSTSAFE 1262 */ 1263 int 1264 sys_getsockname(struct getsockname_args *uap) 1265 { 1266 struct sockaddr *sa = NULL; 1267 int error, sa_len; 1268 1269 error = copyin(uap->alen, &sa_len, sizeof(sa_len)); 1270 if (error) 1271 return (error); 1272 1273 get_mplock(); 1274 error = kern_getsockname(uap->fdes, &sa, &sa_len); 1275 rel_mplock(); 1276 1277 if (error == 0) 1278 error = copyout(sa, uap->asa, sa_len); 1279 if (error == 0) 1280 error = copyout(&sa_len, uap->alen, sizeof(*uap->alen)); 1281 if (sa) 1282 FREE(sa, M_SONAME); 1283 return (error); 1284 } 1285 1286 /* 1287 * The second argument to kern_getpeername() is a handle to a struct sockaddr. 1288 * This allows kern_getpeername() to return a pointer to an allocated struct 1289 * sockaddr which must be freed later with FREE(). The caller must 1290 * initialize *name to NULL. 1291 */ 1292 int 1293 kern_getpeername(int s, struct sockaddr **name, int *namelen) 1294 { 1295 struct thread *td = curthread; 1296 struct proc *p = td->td_proc; 1297 struct file *fp; 1298 struct socket *so; 1299 struct sockaddr *sa = NULL; 1300 int error; 1301 1302 error = holdsock(p->p_fd, s, &fp); 1303 if (error) 1304 return (error); 1305 if (*namelen < 0) { 1306 fdrop(fp); 1307 return (EINVAL); 1308 } 1309 so = (struct socket *)fp->f_data; 1310 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1311 fdrop(fp); 1312 return (ENOTCONN); 1313 } 1314 error = so_pru_peeraddr(so, &sa); 1315 if (error == 0) { 1316 if (sa == NULL) { 1317 *namelen = 0; 1318 } else { 1319 *namelen = MIN(*namelen, sa->sa_len); 1320 *name = sa; 1321 } 1322 } 1323 1324 fdrop(fp); 1325 return (error); 1326 } 1327 1328 /* 1329 * getpeername_args(int fdes, caddr_t asa, int *alen) 1330 * 1331 * Get name of peer for connected socket. 1332 * 1333 * MPALMOSTSAFE 1334 */ 1335 int 1336 sys_getpeername(struct getpeername_args *uap) 1337 { 1338 struct sockaddr *sa = NULL; 1339 int error, sa_len; 1340 1341 error = copyin(uap->alen, &sa_len, sizeof(sa_len)); 1342 if (error) 1343 return (error); 1344 1345 get_mplock(); 1346 error = kern_getpeername(uap->fdes, &sa, &sa_len); 1347 rel_mplock(); 1348 1349 if (error == 0) 1350 error = copyout(sa, uap->asa, sa_len); 1351 if (error == 0) 1352 error = copyout(&sa_len, uap->alen, sizeof(*uap->alen)); 1353 if (sa) 1354 FREE(sa, M_SONAME); 1355 return (error); 1356 } 1357 1358 int 1359 getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len) 1360 { 1361 struct sockaddr *sa; 1362 int error; 1363 1364 *namp = NULL; 1365 if (len > SOCK_MAXADDRLEN) 1366 return ENAMETOOLONG; 1367 if (len < offsetof(struct sockaddr, sa_data[0])) 1368 return EDOM; 1369 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1370 error = copyin(uaddr, sa, len); 1371 if (error) { 1372 FREE(sa, M_SONAME); 1373 } else { 1374 #if BYTE_ORDER != BIG_ENDIAN 1375 /* 1376 * The bind(), connect(), and sendto() syscalls were not 1377 * versioned for COMPAT_43. Thus, this check must stay. 1378 */ 1379 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1380 sa->sa_family = sa->sa_len; 1381 #endif 1382 sa->sa_len = len; 1383 *namp = sa; 1384 } 1385 return error; 1386 } 1387 1388 /* 1389 * Detach a mapped page and release resources back to the system. 1390 * We must release our wiring and if the object is ripped out 1391 * from under the vm_page we become responsible for freeing the 1392 * page. These routines must be MPSAFE. 1393 * 1394 * XXX HACK XXX TEMPORARY UNTIL WE IMPLEMENT EXT MBUF REFERENCE COUNTING 1395 * 1396 * XXX vm_page_*() routines are not MPSAFE yet, the MP lock is required. 1397 */ 1398 static void 1399 sf_buf_mfree(void *arg) 1400 { 1401 struct sf_buf *sf = arg; 1402 vm_page_t m; 1403 1404 /* 1405 * XXX vm_page_*() and SFBUF routines not MPSAFE yet. 1406 */ 1407 get_mplock(); 1408 crit_enter(); 1409 m = sf_buf_page(sf); 1410 if (sf_buf_free(sf) == 0) { 1411 vm_page_unwire(m, 0); 1412 if (m->wire_count == 0 && m->object == NULL) 1413 vm_page_try_to_free(m); 1414 } 1415 crit_exit(); 1416 rel_mplock(); 1417 } 1418 1419 /* 1420 * sendfile(2). 1421 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1422 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1423 * 1424 * Send a file specified by 'fd' and starting at 'offset' to a socket 1425 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1426 * nbytes == 0. Optionally add a header and/or trailer to the socket 1427 * output. If specified, write the total number of bytes sent into *sbytes. 1428 * 1429 * In FreeBSD kern/uipc_syscalls.c,v 1.103, a bug was fixed that caused 1430 * the headers to count against the remaining bytes to be sent from 1431 * the file descriptor. We may wish to implement a compatibility syscall 1432 * in the future. 1433 * 1434 * MPALMOSTSAFE 1435 */ 1436 int 1437 sys_sendfile(struct sendfile_args *uap) 1438 { 1439 struct thread *td = curthread; 1440 struct proc *p = td->td_proc; 1441 struct file *fp; 1442 struct vnode *vp = NULL; 1443 struct sf_hdtr hdtr; 1444 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 1445 struct uio auio; 1446 struct mbuf *mheader = NULL; 1447 size_t hbytes = 0; 1448 size_t tbytes; 1449 off_t hdtr_size = 0; 1450 off_t sbytes; 1451 int error; 1452 1453 KKASSERT(p); 1454 1455 /* 1456 * Do argument checking. Must be a regular file in, stream 1457 * type and connected socket out, positive offset. 1458 */ 1459 fp = holdfp(p->p_fd, uap->fd, FREAD); 1460 if (fp == NULL) { 1461 return (EBADF); 1462 } 1463 if (fp->f_type != DTYPE_VNODE) { 1464 fdrop(fp); 1465 return (EINVAL); 1466 } 1467 get_mplock(); 1468 vp = (struct vnode *)fp->f_data; 1469 vref(vp); 1470 fdrop(fp); 1471 1472 /* 1473 * If specified, get the pointer to the sf_hdtr struct for 1474 * any headers/trailers. 1475 */ 1476 if (uap->hdtr) { 1477 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1478 if (error) 1479 goto done; 1480 /* 1481 * Send any headers. 1482 */ 1483 if (hdtr.headers) { 1484 error = iovec_copyin(hdtr.headers, &iov, aiov, 1485 hdtr.hdr_cnt, &hbytes); 1486 if (error) 1487 goto done; 1488 auio.uio_iov = iov; 1489 auio.uio_iovcnt = hdtr.hdr_cnt; 1490 auio.uio_offset = 0; 1491 auio.uio_segflg = UIO_USERSPACE; 1492 auio.uio_rw = UIO_WRITE; 1493 auio.uio_td = td; 1494 auio.uio_resid = hbytes; 1495 1496 mheader = m_uiomove(&auio); 1497 1498 iovec_free(&iov, aiov); 1499 if (mheader == NULL) 1500 goto done; 1501 } 1502 } 1503 1504 error = kern_sendfile(vp, uap->s, uap->offset, uap->nbytes, mheader, 1505 &sbytes, uap->flags); 1506 if (error) 1507 goto done; 1508 1509 /* 1510 * Send trailers. Wimp out and use writev(2). 1511 */ 1512 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1513 error = iovec_copyin(hdtr.trailers, &iov, aiov, 1514 hdtr.trl_cnt, &auio.uio_resid); 1515 if (error) 1516 goto done; 1517 auio.uio_iov = iov; 1518 auio.uio_iovcnt = hdtr.trl_cnt; 1519 auio.uio_offset = 0; 1520 auio.uio_segflg = UIO_USERSPACE; 1521 auio.uio_rw = UIO_WRITE; 1522 auio.uio_td = td; 1523 1524 error = kern_sendmsg(uap->s, NULL, &auio, NULL, 0, &tbytes); 1525 1526 iovec_free(&iov, aiov); 1527 if (error) 1528 goto done; 1529 hdtr_size += tbytes; /* trailer bytes successfully sent */ 1530 } 1531 1532 done: 1533 if (vp) 1534 vrele(vp); 1535 rel_mplock(); 1536 if (uap->sbytes != NULL) { 1537 sbytes += hdtr_size; 1538 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1539 } 1540 return (error); 1541 } 1542 1543 int 1544 kern_sendfile(struct vnode *vp, int sfd, off_t offset, size_t nbytes, 1545 struct mbuf *mheader, off_t *sbytes, int flags) 1546 { 1547 struct thread *td = curthread; 1548 struct proc *p = td->td_proc; 1549 struct vm_object *obj; 1550 struct socket *so; 1551 struct file *fp; 1552 struct mbuf *m; 1553 struct sf_buf *sf; 1554 struct vm_page *pg; 1555 off_t off, xfsize; 1556 off_t hbytes = 0; 1557 int error = 0; 1558 1559 if (vp->v_type != VREG) { 1560 error = EINVAL; 1561 goto done0; 1562 } 1563 if ((obj = vp->v_object) == NULL) { 1564 error = EINVAL; 1565 goto done0; 1566 } 1567 error = holdsock(p->p_fd, sfd, &fp); 1568 if (error) 1569 goto done0; 1570 so = (struct socket *)fp->f_data; 1571 if (so->so_type != SOCK_STREAM) { 1572 error = EINVAL; 1573 goto done; 1574 } 1575 if ((so->so_state & SS_ISCONNECTED) == 0) { 1576 error = ENOTCONN; 1577 goto done; 1578 } 1579 if (offset < 0) { 1580 error = EINVAL; 1581 goto done; 1582 } 1583 1584 *sbytes = 0; 1585 /* 1586 * Protect against multiple writers to the socket. 1587 */ 1588 ssb_lock(&so->so_snd, M_WAITOK); 1589 1590 /* 1591 * Loop through the pages in the file, starting with the requested 1592 * offset. Get a file page (do I/O if necessary), map the file page 1593 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1594 * it on the socket. 1595 */ 1596 for (off = offset; ; off += xfsize, *sbytes += xfsize + hbytes) { 1597 vm_pindex_t pindex; 1598 vm_offset_t pgoff; 1599 1600 pindex = OFF_TO_IDX(off); 1601 retry_lookup: 1602 /* 1603 * Calculate the amount to transfer. Not to exceed a page, 1604 * the EOF, or the passed in nbytes. 1605 */ 1606 xfsize = vp->v_filesize - off; 1607 if (xfsize > PAGE_SIZE) 1608 xfsize = PAGE_SIZE; 1609 pgoff = (vm_offset_t)(off & PAGE_MASK); 1610 if (PAGE_SIZE - pgoff < xfsize) 1611 xfsize = PAGE_SIZE - pgoff; 1612 if (nbytes && xfsize > (nbytes - *sbytes)) 1613 xfsize = nbytes - *sbytes; 1614 if (xfsize <= 0) 1615 break; 1616 /* 1617 * Optimize the non-blocking case by looking at the socket space 1618 * before going to the extra work of constituting the sf_buf. 1619 */ 1620 if ((fp->f_flag & FNONBLOCK) && ssb_space(&so->so_snd) <= 0) { 1621 if (so->so_state & SS_CANTSENDMORE) 1622 error = EPIPE; 1623 else 1624 error = EAGAIN; 1625 ssb_unlock(&so->so_snd); 1626 goto done; 1627 } 1628 /* 1629 * Attempt to look up the page. 1630 * 1631 * Allocate if not found, wait and loop if busy, then 1632 * wire the page. critical section protection is 1633 * required to maintain the object association (an 1634 * interrupt can free the page) through to the 1635 * vm_page_wire() call. 1636 */ 1637 crit_enter(); 1638 pg = vm_page_lookup(obj, pindex); 1639 if (pg == NULL) { 1640 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL); 1641 if (pg == NULL) { 1642 vm_wait(0); 1643 crit_exit(); 1644 goto retry_lookup; 1645 } 1646 vm_page_wakeup(pg); 1647 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) { 1648 crit_exit(); 1649 goto retry_lookup; 1650 } 1651 vm_page_wire(pg); 1652 crit_exit(); 1653 1654 /* 1655 * If page is not valid for what we need, initiate I/O 1656 */ 1657 1658 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1659 struct uio auio; 1660 struct iovec aiov; 1661 int bsize; 1662 1663 /* 1664 * Ensure that our page is still around when the I/O 1665 * completes. 1666 */ 1667 vm_page_io_start(pg); 1668 1669 /* 1670 * Get the page from backing store. 1671 */ 1672 bsize = vp->v_mount->mnt_stat.f_iosize; 1673 auio.uio_iov = &aiov; 1674 auio.uio_iovcnt = 1; 1675 aiov.iov_base = 0; 1676 aiov.iov_len = MAXBSIZE; 1677 auio.uio_resid = MAXBSIZE; 1678 auio.uio_offset = trunc_page(off); 1679 auio.uio_segflg = UIO_NOCOPY; 1680 auio.uio_rw = UIO_READ; 1681 auio.uio_td = td; 1682 vn_lock(vp, LK_SHARED | LK_RETRY); 1683 error = VOP_READ(vp, &auio, 1684 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1685 td->td_ucred); 1686 vn_unlock(vp); 1687 vm_page_flag_clear(pg, PG_ZERO); 1688 vm_page_io_finish(pg); 1689 if (error) { 1690 crit_enter(); 1691 vm_page_unwire(pg, 0); 1692 vm_page_try_to_free(pg); 1693 crit_exit(); 1694 ssb_unlock(&so->so_snd); 1695 goto done; 1696 } 1697 } 1698 1699 1700 /* 1701 * Get a sendfile buf. We usually wait as long as necessary, 1702 * but this wait can be interrupted. 1703 */ 1704 if ((sf = sf_buf_alloc(pg)) == NULL) { 1705 crit_enter(); 1706 vm_page_unwire(pg, 0); 1707 vm_page_try_to_free(pg); 1708 crit_exit(); 1709 ssb_unlock(&so->so_snd); 1710 error = EINTR; 1711 goto done; 1712 } 1713 1714 /* 1715 * Get an mbuf header and set it up as having external storage. 1716 */ 1717 MGETHDR(m, MB_WAIT, MT_DATA); 1718 if (m == NULL) { 1719 error = ENOBUFS; 1720 sf_buf_free(sf); 1721 ssb_unlock(&so->so_snd); 1722 goto done; 1723 } 1724 1725 m->m_ext.ext_free = sf_buf_mfree; 1726 m->m_ext.ext_ref = sf_buf_ref; 1727 m->m_ext.ext_arg = sf; 1728 m->m_ext.ext_buf = (void *)sf_buf_kva(sf); 1729 m->m_ext.ext_size = PAGE_SIZE; 1730 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 1731 m->m_flags |= M_EXT; 1732 m->m_pkthdr.len = m->m_len = xfsize; 1733 KKASSERT((m->m_flags & (M_EXT_CLUSTER)) == 0); 1734 1735 if (mheader != NULL) { 1736 hbytes = mheader->m_pkthdr.len; 1737 mheader->m_pkthdr.len += m->m_pkthdr.len; 1738 m_cat(mheader, m); 1739 m = mheader; 1740 mheader = NULL; 1741 } else 1742 hbytes = 0; 1743 1744 /* 1745 * Add the buffer to the socket buffer chain. 1746 */ 1747 crit_enter(); 1748 retry_space: 1749 /* 1750 * Make sure that the socket is still able to take more data. 1751 * CANTSENDMORE being true usually means that the connection 1752 * was closed. so_error is true when an error was sensed after 1753 * a previous send. 1754 * The state is checked after the page mapping and buffer 1755 * allocation above since those operations may block and make 1756 * any socket checks stale. From this point forward, nothing 1757 * blocks before the pru_send (or more accurately, any blocking 1758 * results in a loop back to here to re-check). 1759 */ 1760 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1761 if (so->so_state & SS_CANTSENDMORE) { 1762 error = EPIPE; 1763 } else { 1764 error = so->so_error; 1765 so->so_error = 0; 1766 } 1767 m_freem(m); 1768 ssb_unlock(&so->so_snd); 1769 crit_exit(); 1770 goto done; 1771 } 1772 /* 1773 * Wait for socket space to become available. We do this just 1774 * after checking the connection state above in order to avoid 1775 * a race condition with ssb_wait(). 1776 */ 1777 if (ssb_space(&so->so_snd) < so->so_snd.ssb_lowat) { 1778 if (fp->f_flag & FNONBLOCK) { 1779 m_freem(m); 1780 ssb_unlock(&so->so_snd); 1781 crit_exit(); 1782 error = EAGAIN; 1783 goto done; 1784 } 1785 error = ssb_wait(&so->so_snd); 1786 /* 1787 * An error from ssb_wait usually indicates that we've 1788 * been interrupted by a signal. If we've sent anything 1789 * then return bytes sent, otherwise return the error. 1790 */ 1791 if (error) { 1792 m_freem(m); 1793 ssb_unlock(&so->so_snd); 1794 crit_exit(); 1795 goto done; 1796 } 1797 goto retry_space; 1798 } 1799 error = so_pru_send(so, 0, m, NULL, NULL, td); 1800 crit_exit(); 1801 if (error) { 1802 ssb_unlock(&so->so_snd); 1803 goto done; 1804 } 1805 } 1806 if (mheader != NULL) { 1807 *sbytes += mheader->m_pkthdr.len; 1808 error = so_pru_send(so, 0, mheader, NULL, NULL, td); 1809 mheader = NULL; 1810 } 1811 ssb_unlock(&so->so_snd); 1812 1813 done: 1814 fdrop(fp); 1815 done0: 1816 if (mheader != NULL) 1817 m_freem(mheader); 1818 return (error); 1819 } 1820 1821 /* 1822 * MPALMOSTSAFE 1823 */ 1824 int 1825 sys_sctp_peeloff(struct sctp_peeloff_args *uap) 1826 { 1827 #ifdef SCTP 1828 struct thread *td = curthread; 1829 struct filedesc *fdp = td->td_proc->p_fd; 1830 struct file *lfp = NULL; 1831 struct file *nfp = NULL; 1832 int error; 1833 struct socket *head, *so; 1834 caddr_t assoc_id; 1835 int fd; 1836 short fflag; /* type must match fp->f_flag */ 1837 1838 assoc_id = uap->name; 1839 error = holdsock(td->td_proc->p_fd, uap->sd, &lfp); 1840 if (error) 1841 return (error); 1842 1843 get_mplock(); 1844 crit_enter(); 1845 head = (struct socket *)lfp->f_data; 1846 error = sctp_can_peel_off(head, assoc_id); 1847 if (error) { 1848 crit_exit(); 1849 goto done; 1850 } 1851 /* 1852 * At this point we know we do have a assoc to pull 1853 * we proceed to get the fd setup. This may block 1854 * but that is ok. 1855 */ 1856 1857 fflag = lfp->f_flag; 1858 error = falloc(td->td_lwp, &nfp, &fd); 1859 if (error) { 1860 /* 1861 * Probably ran out of file descriptors. Put the 1862 * unaccepted connection back onto the queue and 1863 * do another wakeup so some other process might 1864 * have a chance at it. 1865 */ 1866 crit_exit(); 1867 goto done; 1868 } 1869 uap->sysmsg_iresult = fd; 1870 1871 so = sctp_get_peeloff(head, assoc_id, &error); 1872 if (so == NULL) { 1873 /* 1874 * Either someone else peeled it off OR 1875 * we can't get a socket. 1876 */ 1877 goto noconnection; 1878 } 1879 so->so_state &= ~SS_COMP; 1880 so->so_state &= ~SS_NOFDREF; 1881 so->so_head = NULL; 1882 if (head->so_sigio != NULL) 1883 fsetown(fgetown(head->so_sigio), &so->so_sigio); 1884 1885 nfp->f_type = DTYPE_SOCKET; 1886 nfp->f_flag = fflag; 1887 nfp->f_ops = &socketops; 1888 nfp->f_data = so; 1889 1890 noconnection: 1891 /* 1892 * Assign the file pointer to the reserved descriptor, or clear 1893 * the reserved descriptor if an error occured. 1894 */ 1895 if (error) 1896 fsetfd(fdp, NULL, fd); 1897 else 1898 fsetfd(fdp, nfp, fd); 1899 crit_exit(); 1900 /* 1901 * Release explicitly held references before returning. 1902 */ 1903 done: 1904 rel_mplock(); 1905 if (nfp != NULL) 1906 fdrop(nfp); 1907 fdrop(lfp); 1908 return (error); 1909 #else /* SCTP */ 1910 return(EOPNOTSUPP); 1911 #endif /* SCTP */ 1912 } 1913