1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.65.2.17 2003/04/04 17:11:16 tegge Exp $ 38 * $DragonFly: src/sys/kern/uipc_syscalls.c,v 1.27 2004/03/14 14:35:23 joerg Exp $ 39 */ 40 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/sysproto.h> 47 #include <sys/malloc.h> 48 #include <sys/filedesc.h> 49 #include <sys/event.h> 50 #include <sys/proc.h> 51 #include <sys/fcntl.h> 52 #include <sys/file.h> 53 #include <sys/filio.h> 54 #include <sys/kern_syscall.h> 55 #include <sys/mbuf.h> 56 #include <sys/protosw.h> 57 #include <sys/sfbuf.h> 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/socketops.h> 61 #include <sys/uio.h> 62 #include <sys/vnode.h> 63 #include <sys/lock.h> 64 #include <sys/mount.h> 65 #ifdef KTRACE 66 #include <sys/ktrace.h> 67 #endif 68 #include <vm/vm.h> 69 #include <vm/vm_object.h> 70 #include <vm/vm_page.h> 71 #include <vm/vm_pageout.h> 72 #include <vm/vm_kern.h> 73 #include <vm/vm_extern.h> 74 #include <sys/file2.h> 75 #include <sys/signalvar.h> 76 77 /* 78 * System call interface to the socket abstraction. 79 */ 80 81 extern struct fileops socketops; 82 83 /* 84 * socket_args(int domain, int type, int protocol) 85 */ 86 int 87 kern_socket(int domain, int type, int protocol, int *res) 88 { 89 struct thread *td = curthread; 90 struct proc *p = td->td_proc; 91 struct filedesc *fdp; 92 struct socket *so; 93 struct file *fp; 94 int fd, error; 95 96 KKASSERT(p); 97 fdp = p->p_fd; 98 99 error = falloc(p, &fp, &fd); 100 if (error) 101 return (error); 102 fhold(fp); 103 error = socreate(domain, &so, type, protocol, td); 104 if (error) { 105 if (fdp->fd_ofiles[fd] == fp) { 106 fdp->fd_ofiles[fd] = NULL; 107 fdrop(fp, td); 108 } 109 } else { 110 fp->f_data = (caddr_t)so; 111 fp->f_flag = FREAD|FWRITE; 112 fp->f_ops = &socketops; 113 fp->f_type = DTYPE_SOCKET; 114 *res = fd; 115 } 116 fdrop(fp, td); 117 return (error); 118 } 119 120 int 121 socket(struct socket_args *uap) 122 { 123 int error; 124 125 error = kern_socket(uap->domain, uap->type, uap->protocol, 126 &uap->sysmsg_result); 127 128 return (error); 129 } 130 131 int 132 kern_bind(int s, struct sockaddr *sa) 133 { 134 struct thread *td = curthread; 135 struct proc *p = td->td_proc; 136 struct file *fp; 137 int error; 138 139 KKASSERT(p); 140 error = holdsock(p->p_fd, s, &fp); 141 if (error) 142 return (error); 143 error = sobind((struct socket *)fp->f_data, sa, td); 144 fdrop(fp, td); 145 return (error); 146 } 147 148 /* 149 * bind_args(int s, caddr_t name, int namelen) 150 */ 151 int 152 bind(struct bind_args *uap) 153 { 154 struct sockaddr *sa; 155 int error; 156 157 error = getsockaddr(&sa, uap->name, uap->namelen); 158 if (error) 159 return (error); 160 error = kern_bind(uap->s, sa); 161 FREE(sa, M_SONAME); 162 163 return (error); 164 } 165 166 int 167 kern_listen(int s, int backlog) 168 { 169 struct thread *td = curthread; 170 struct proc *p = td->td_proc; 171 struct file *fp; 172 int error; 173 174 KKASSERT(p); 175 error = holdsock(p->p_fd, s, &fp); 176 if (error) 177 return (error); 178 error = solisten((struct socket *)fp->f_data, backlog, td); 179 fdrop(fp, td); 180 return(error); 181 } 182 183 /* 184 * listen_args(int s, int backlog) 185 */ 186 int 187 listen(struct listen_args *uap) 188 { 189 int error; 190 191 error = kern_listen(uap->s, uap->backlog); 192 return (error); 193 } 194 195 /* 196 * The second argument to kern_accept() is a handle to a struct sockaddr. 197 * This allows kern_accept() to return a pointer to an allocated struct 198 * sockaddr which must be freed later with FREE(). The caller must 199 * initialize *name to NULL. 200 */ 201 int 202 kern_accept(int s, struct sockaddr **name, int *namelen, int *res) 203 { 204 struct thread *td = curthread; 205 struct proc *p = td->td_proc; 206 struct filedesc *fdp = p->p_fd; 207 struct file *lfp = NULL; 208 struct file *nfp = NULL; 209 struct sockaddr *sa; 210 int error, s1; 211 struct socket *head, *so; 212 int fd; 213 u_int fflag; /* type must match fp->f_flag */ 214 int tmp; 215 216 if (name && namelen && *namelen < 0) 217 return (EINVAL); 218 219 error = holdsock(fdp, s, &lfp); 220 if (error) 221 return (error); 222 s1 = splnet(); 223 head = (struct socket *)lfp->f_data; 224 if ((head->so_options & SO_ACCEPTCONN) == 0) { 225 splx(s1); 226 error = EINVAL; 227 goto done; 228 } 229 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 230 if (head->so_state & SS_CANTRCVMORE) { 231 head->so_error = ECONNABORTED; 232 break; 233 } 234 if ((head->so_state & SS_NBIO) != 0) { 235 head->so_error = EWOULDBLOCK; 236 break; 237 } 238 error = tsleep((caddr_t)&head->so_timeo, PCATCH, "accept", 0); 239 if (error) { 240 splx(s1); 241 goto done; 242 } 243 } 244 if (head->so_error) { 245 error = head->so_error; 246 head->so_error = 0; 247 splx(s1); 248 goto done; 249 } 250 251 /* 252 * At this point we know that there is at least one connection 253 * ready to be accepted. Remove it from the queue prior to 254 * allocating the file descriptor for it since falloc() may 255 * block allowing another process to accept the connection 256 * instead. 257 */ 258 so = TAILQ_FIRST(&head->so_comp); 259 TAILQ_REMOVE(&head->so_comp, so, so_list); 260 head->so_qlen--; 261 262 fflag = lfp->f_flag; 263 error = falloc(p, &nfp, &fd); 264 if (error) { 265 /* 266 * Probably ran out of file descriptors. Put the 267 * unaccepted connection back onto the queue and 268 * do another wakeup so some other process might 269 * have a chance at it. 270 */ 271 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 272 head->so_qlen++; 273 wakeup_one(&head->so_timeo); 274 splx(s1); 275 goto done; 276 } 277 fhold(nfp); 278 *res = fd; 279 280 /* connection has been removed from the listen queue */ 281 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 282 283 so->so_state &= ~SS_COMP; 284 so->so_head = NULL; 285 if (head->so_sigio != NULL) 286 fsetown(fgetown(head->so_sigio), &so->so_sigio); 287 288 nfp->f_data = (caddr_t)so; 289 nfp->f_flag = fflag; 290 nfp->f_ops = &socketops; 291 nfp->f_type = DTYPE_SOCKET; 292 /* Sync socket nonblocking/async state with file flags */ 293 tmp = fflag & FNONBLOCK; 294 (void) fo_ioctl(nfp, FIONBIO, (caddr_t)&tmp, td); 295 tmp = fflag & FASYNC; 296 (void) fo_ioctl(nfp, FIOASYNC, (caddr_t)&tmp, td); 297 298 sa = NULL; 299 error = soaccept(so, &sa); 300 301 /* 302 * Set the returned name and namelen as applicable. Set the returned 303 * namelen to 0 for older code which might ignore the return value 304 * from accept. 305 */ 306 if (error == 0) { 307 if (sa && name && namelen) { 308 if (*namelen > sa->sa_len) 309 *namelen = sa->sa_len; 310 *name = sa; 311 } else { 312 if (sa) 313 FREE(sa, M_SONAME); 314 } 315 } 316 317 /* 318 * close the new descriptor, assuming someone hasn't ripped it 319 * out from under us. Note that *res is normally ignored if an 320 * error is returned but a syscall message will still have access 321 * to the result code. 322 */ 323 if (error) { 324 *res = -1; 325 if (fdp->fd_ofiles[fd] == nfp) { 326 fdp->fd_ofiles[fd] = NULL; 327 fdrop(nfp, td); 328 } 329 } 330 splx(s1); 331 332 /* 333 * Release explicitly held references before returning. 334 */ 335 done: 336 if (nfp != NULL) 337 fdrop(nfp, td); 338 fdrop(lfp, td); 339 return (error); 340 } 341 342 /* 343 * accept_args(int s, caddr_t name, int *anamelen) 344 */ 345 int 346 accept(struct accept_args *uap) 347 { 348 struct sockaddr *sa = NULL; 349 int sa_len; 350 int error; 351 352 if (uap->name) { 353 error = copyin(uap->anamelen, &sa_len, sizeof(sa_len)); 354 if (error) 355 return (error); 356 357 error = kern_accept(uap->s, &sa, &sa_len, &uap->sysmsg_result); 358 359 if (error == 0) 360 error = copyout(sa, uap->name, sa_len); 361 if (error == 0) { 362 error = copyout(&sa_len, uap->anamelen, 363 sizeof(*uap->anamelen)); 364 } 365 if (sa) 366 FREE(sa, M_SONAME); 367 } else { 368 error = kern_accept(uap->s, NULL, 0, &uap->sysmsg_result); 369 } 370 return (error); 371 } 372 373 int 374 kern_connect(int s, struct sockaddr *sa) 375 { 376 struct thread *td = curthread; 377 struct proc *p = td->td_proc; 378 struct file *fp; 379 struct socket *so; 380 int error; 381 382 error = holdsock(p->p_fd, s, &fp); 383 if (error) 384 return (error); 385 so = (struct socket *)fp->f_data; 386 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 387 error = EALREADY; 388 goto done; 389 } 390 error = soconnect(so, sa, td); 391 if (error) 392 goto bad; 393 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 394 error = EINPROGRESS; 395 goto done; 396 } 397 s = splnet(); 398 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 399 error = tsleep((caddr_t)&so->so_timeo, PCATCH, "connec", 0); 400 if (error) 401 break; 402 } 403 if (error == 0) { 404 error = so->so_error; 405 so->so_error = 0; 406 } 407 splx(s); 408 bad: 409 so->so_state &= ~SS_ISCONNECTING; 410 if (error == ERESTART) 411 error = EINTR; 412 done: 413 fdrop(fp, td); 414 return (error); 415 } 416 417 /* 418 * connect_args(int s, caddr_t name, int namelen) 419 */ 420 int 421 connect(struct connect_args *uap) 422 { 423 struct sockaddr *sa; 424 int error; 425 426 error = getsockaddr(&sa, uap->name, uap->namelen); 427 if (error) 428 return (error); 429 error = kern_connect(uap->s, sa); 430 FREE(sa, M_SONAME); 431 432 return (error); 433 } 434 435 int 436 kern_socketpair(int domain, int type, int protocol, int *sv) 437 { 438 struct thread *td = curthread; 439 struct proc *p = td->td_proc; 440 struct filedesc *fdp; 441 struct file *fp1, *fp2; 442 struct socket *so1, *so2; 443 int fd, error; 444 445 KKASSERT(p); 446 fdp = p->p_fd; 447 error = socreate(domain, &so1, type, protocol, td); 448 if (error) 449 return (error); 450 error = socreate(domain, &so2, type, protocol, td); 451 if (error) 452 goto free1; 453 error = falloc(p, &fp1, &fd); 454 if (error) 455 goto free2; 456 fhold(fp1); 457 sv[0] = fd; 458 fp1->f_data = (caddr_t)so1; 459 error = falloc(p, &fp2, &fd); 460 if (error) 461 goto free3; 462 fhold(fp2); 463 fp2->f_data = (caddr_t)so2; 464 sv[1] = fd; 465 error = soconnect2(so1, so2); 466 if (error) 467 goto free4; 468 if (type == SOCK_DGRAM) { 469 /* 470 * Datagram socket connection is asymmetric. 471 */ 472 error = soconnect2(so2, so1); 473 if (error) 474 goto free4; 475 } 476 fp1->f_flag = fp2->f_flag = FREAD|FWRITE; 477 fp1->f_ops = fp2->f_ops = &socketops; 478 fp1->f_type = fp2->f_type = DTYPE_SOCKET; 479 fdrop(fp1, td); 480 fdrop(fp2, td); 481 return (error); 482 free4: 483 if (fdp->fd_ofiles[sv[1]] == fp2) { 484 fdp->fd_ofiles[sv[1]] = NULL; 485 fdrop(fp2, td); 486 } 487 fdrop(fp2, td); 488 free3: 489 if (fdp->fd_ofiles[sv[0]] == fp1) { 490 fdp->fd_ofiles[sv[0]] = NULL; 491 fdrop(fp1, td); 492 } 493 fdrop(fp1, td); 494 free2: 495 (void)soclose(so2); 496 free1: 497 (void)soclose(so1); 498 return (error); 499 } 500 501 /* 502 * socketpair(int domain, int type, int protocol, int *rsv) 503 */ 504 int 505 socketpair(struct socketpair_args *uap) 506 { 507 int error, sockv[2]; 508 509 error = kern_socketpair(uap->domain, uap->type, uap->protocol, sockv); 510 511 if (error == 0) 512 error = copyout(sockv, uap->rsv, sizeof(sockv)); 513 return (error); 514 } 515 516 int 517 kern_sendmsg(int s, struct sockaddr *sa, struct uio *auio, 518 struct mbuf *control, int flags, int *res) 519 { 520 struct thread *td = curthread; 521 struct proc *p = td->td_proc; 522 struct file *fp; 523 int len, error; 524 struct socket *so; 525 #ifdef KTRACE 526 struct iovec *ktriov = NULL; 527 struct uio ktruio; 528 #endif 529 530 error = holdsock(p->p_fd, s, &fp); 531 if (error) 532 return (error); 533 if (auio->uio_resid < 0) { 534 error = EINVAL; 535 goto done; 536 } 537 #ifdef KTRACE 538 if (KTRPOINT(td, KTR_GENIO)) { 539 int iovlen = auio->uio_iovcnt * sizeof (struct iovec); 540 541 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 542 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen); 543 ktruio = *auio; 544 } 545 #endif 546 len = auio->uio_resid; 547 so = (struct socket *)fp->f_data; 548 error = so_pru_sosend(so, sa, auio, NULL, control, flags, td); 549 if (error) { 550 if (auio->uio_resid != len && (error == ERESTART || 551 error == EINTR || error == EWOULDBLOCK)) 552 error = 0; 553 if (error == EPIPE) 554 psignal(p, SIGPIPE); 555 } 556 #ifdef KTRACE 557 if (ktriov != NULL) { 558 if (error == 0) { 559 ktruio.uio_iov = ktriov; 560 ktruio.uio_resid = len - auio->uio_resid; 561 ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error); 562 } 563 FREE(ktriov, M_TEMP); 564 } 565 #endif 566 if (error == 0) 567 *res = len - auio->uio_resid; 568 done: 569 fdrop(fp, td); 570 return (error); 571 } 572 573 /* 574 * sendto_args(int s, caddr_t buf, size_t len, int flags, caddr_t to, int tolen) 575 */ 576 int 577 sendto(struct sendto_args *uap) 578 { 579 struct thread *td = curthread; 580 struct uio auio; 581 struct iovec aiov; 582 struct sockaddr *sa = NULL; 583 int error; 584 585 if (uap->to) { 586 error = getsockaddr(&sa, uap->to, uap->tolen); 587 if (error) 588 return (error); 589 } 590 aiov.iov_base = uap->buf; 591 aiov.iov_len = uap->len; 592 auio.uio_iov = &aiov; 593 auio.uio_iovcnt = 1; 594 auio.uio_offset = 0; 595 auio.uio_resid = uap->len; 596 auio.uio_segflg = UIO_USERSPACE; 597 auio.uio_rw = UIO_WRITE; 598 auio.uio_td = td; 599 600 error = kern_sendmsg(uap->s, sa, &auio, NULL, uap->flags, 601 &uap->sysmsg_result); 602 603 if (sa) 604 FREE(sa, M_SONAME); 605 return (error); 606 } 607 608 /* 609 * sendmsg_args(int s, caddr_t msg, int flags) 610 */ 611 int 612 sendmsg(struct sendmsg_args *uap) 613 { 614 struct thread *td = curthread; 615 struct msghdr msg; 616 struct uio auio; 617 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 618 struct sockaddr *sa = NULL; 619 struct mbuf *control = NULL; 620 int error; 621 622 error = copyin(uap->msg, (caddr_t)&msg, sizeof(msg)); 623 if (error) 624 return (error); 625 626 /* 627 * Conditionally copyin msg.msg_name. 628 */ 629 if (msg.msg_name) { 630 error = getsockaddr(&sa, msg.msg_name, msg.msg_namelen); 631 if (error) 632 return (error); 633 } 634 635 /* 636 * Populate auio. 637 */ 638 error = iovec_copyin(msg.msg_iov, &iov, aiov, msg.msg_iovlen, 639 &auio.uio_resid); 640 if (error) 641 goto cleanup; 642 auio.uio_iov = iov; 643 auio.uio_iovcnt = msg.msg_iovlen; 644 auio.uio_offset = 0; 645 auio.uio_segflg = UIO_USERSPACE; 646 auio.uio_rw = UIO_WRITE; 647 auio.uio_td = td; 648 649 /* 650 * Conditionally copyin msg.msg_control. 651 */ 652 if (msg.msg_control) { 653 if (msg.msg_controllen < sizeof(struct cmsghdr) || 654 msg.msg_controllen > MLEN) { 655 error = EINVAL; 656 goto cleanup; 657 } 658 control = m_get(M_WAIT, MT_CONTROL); 659 if (control == NULL) { 660 error = ENOBUFS; 661 goto cleanup; 662 } 663 control->m_len = msg.msg_controllen; 664 error = copyin(msg.msg_control, mtod(control, caddr_t), 665 msg.msg_controllen); 666 if (error) { 667 m_free(control); 668 goto cleanup; 669 } 670 } 671 672 error = kern_sendmsg(uap->s, sa, &auio, control, uap->flags, 673 &uap->sysmsg_result); 674 675 cleanup: 676 if (sa) 677 FREE(sa, M_SONAME); 678 iovec_free(&iov, aiov); 679 return (error); 680 } 681 682 /* 683 * kern_recvmsg() takes a handle to sa and control. If the handle is non- 684 * null, it returns a dynamically allocated struct sockaddr and an mbuf. 685 * Don't forget to FREE() and m_free() these if they are returned. 686 */ 687 int 688 kern_recvmsg(int s, struct sockaddr **sa, struct uio *auio, 689 struct mbuf **control, int *flags, int *res) 690 { 691 struct thread *td = curthread; 692 struct proc *p = td->td_proc; 693 struct file *fp; 694 int len, error; 695 struct socket *so; 696 #ifdef KTRACE 697 struct iovec *ktriov = NULL; 698 struct uio ktruio; 699 #endif 700 701 error = holdsock(p->p_fd, s, &fp); 702 if (error) 703 return (error); 704 if (auio->uio_resid < 0) { 705 error = EINVAL; 706 goto done; 707 } 708 #ifdef KTRACE 709 if (KTRPOINT(td, KTR_GENIO)) { 710 int iovlen = auio->uio_iovcnt * sizeof (struct iovec); 711 712 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 713 bcopy(auio->uio_iov, ktriov, iovlen); 714 ktruio = *auio; 715 } 716 #endif 717 len = auio->uio_resid; 718 so = (struct socket *)fp->f_data; 719 error = so_pru_soreceive(so, sa, auio, NULL, control, flags); 720 if (error) { 721 if (auio->uio_resid != len && (error == ERESTART || 722 error == EINTR || error == EWOULDBLOCK)) 723 error = 0; 724 } 725 #ifdef KTRACE 726 if (ktriov != NULL) { 727 if (error == 0) { 728 ktruio.uio_iov = ktriov; 729 ktruio.uio_resid = len - auio->uio_resid; 730 ktrgenio(p->p_tracep, s, UIO_READ, &ktruio, error); 731 } 732 FREE(ktriov, M_TEMP); 733 } 734 #endif 735 if (error == 0) 736 *res = len - auio->uio_resid; 737 done: 738 fdrop(fp, td); 739 return (error); 740 } 741 742 /* 743 * recvfrom_args(int s, caddr_t buf, size_t len, int flags, 744 * caddr_t from, int *fromlenaddr) 745 */ 746 int 747 recvfrom(struct recvfrom_args *uap) 748 { 749 struct thread *td = curthread; 750 struct uio auio; 751 struct iovec aiov; 752 struct sockaddr *sa = NULL; 753 int error, fromlen; 754 755 if (uap->from && uap->fromlenaddr) { 756 error = copyin(uap->fromlenaddr, &fromlen, sizeof(fromlen)); 757 if (error) 758 return (error); 759 if (fromlen < 0) 760 return (EINVAL); 761 } else { 762 fromlen = 0; 763 } 764 aiov.iov_base = uap->buf; 765 aiov.iov_len = uap->len; 766 auio.uio_iov = &aiov; 767 auio.uio_iovcnt = 1; 768 auio.uio_offset = 0; 769 auio.uio_resid = uap->len; 770 auio.uio_segflg = UIO_USERSPACE; 771 auio.uio_rw = UIO_READ; 772 auio.uio_td = td; 773 774 error = kern_recvmsg(uap->s, uap->from ? &sa : NULL, &auio, NULL, 775 &uap->flags, &uap->sysmsg_result); 776 777 if (error == 0 && uap->from) { 778 /* note: sa may still be NULL */ 779 if (sa) { 780 fromlen = MIN(fromlen, sa->sa_len); 781 error = copyout(sa, uap->from, fromlen); 782 } else { 783 fromlen = 0; 784 } 785 if (error == 0) { 786 error = copyout(&fromlen, uap->fromlenaddr, 787 sizeof(fromlen)); 788 } 789 } 790 if (sa) 791 FREE(sa, M_SONAME); 792 793 return (error); 794 } 795 796 /* 797 * recvmsg_args(int s, struct msghdr *msg, int flags) 798 */ 799 int 800 recvmsg(struct recvmsg_args *uap) 801 { 802 struct thread *td = curthread; 803 struct msghdr msg; 804 struct uio auio; 805 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 806 struct mbuf *m, *control = NULL; 807 struct sockaddr *sa = NULL; 808 caddr_t ctlbuf; 809 socklen_t *ufromlenp, *ucontrollenp; 810 int error, fromlen, controllen, len, flags, *uflagsp; 811 812 /* 813 * This copyin handles everything except the iovec. 814 */ 815 error = copyin(uap->msg, &msg, sizeof(msg)); 816 if (error) 817 return (error); 818 819 if (msg.msg_name && msg.msg_namelen < 0) 820 return (EINVAL); 821 if (msg.msg_control && msg.msg_controllen < 0) 822 return (EINVAL); 823 824 ufromlenp = (socklen_t *)((caddr_t)uap->msg + offsetof(struct msghdr, 825 msg_namelen)); 826 ucontrollenp = (socklen_t *)((caddr_t)uap->msg + offsetof(struct msghdr, 827 msg_controllen)); 828 uflagsp = (int *)((caddr_t)uap->msg + offsetof(struct msghdr, 829 msg_flags)); 830 831 /* 832 * Populate auio. 833 */ 834 error = iovec_copyin(msg.msg_iov, &iov, aiov, msg.msg_iovlen, 835 &auio.uio_resid); 836 if (error) 837 return (error); 838 auio.uio_iov = iov; 839 auio.uio_iovcnt = msg.msg_iovlen; 840 auio.uio_offset = 0; 841 auio.uio_segflg = UIO_USERSPACE; 842 auio.uio_rw = UIO_READ; 843 auio.uio_td = td; 844 845 flags = msg.msg_flags; 846 847 error = kern_recvmsg(uap->s, msg.msg_name ? &sa : NULL, &auio, 848 msg.msg_control ? &control : NULL, &flags, &uap->sysmsg_result); 849 850 /* 851 * Conditionally copyout the name and populate the namelen field. 852 */ 853 if (error == 0 && msg.msg_name) { 854 fromlen = MIN(msg.msg_namelen, sa->sa_len); 855 error = copyout(sa, msg.msg_name, fromlen); 856 if (error == 0) 857 error = copyout(&fromlen, ufromlenp, 858 sizeof(*ufromlenp)); 859 } 860 861 /* 862 * Copyout msg.msg_control and msg.msg_controllen. 863 */ 864 if (error == 0 && msg.msg_control) { 865 len = msg.msg_controllen; 866 m = control; 867 ctlbuf = (caddr_t)msg.msg_control; 868 869 while(m && len > 0) { 870 unsigned int tocopy; 871 872 if (len >= m->m_len) { 873 tocopy = m->m_len; 874 } else { 875 msg.msg_flags |= MSG_CTRUNC; 876 tocopy = len; 877 } 878 879 error = copyout(mtod(m, caddr_t), ctlbuf, tocopy); 880 if (error) 881 goto cleanup; 882 883 ctlbuf += tocopy; 884 len -= tocopy; 885 m = m->m_next; 886 } 887 controllen = ctlbuf - (caddr_t)msg.msg_control; 888 error = copyout(&controllen, ucontrollenp, 889 sizeof(*ucontrollenp)); 890 } 891 892 if (error == 0) 893 error = copyout(&flags, uflagsp, sizeof(*uflagsp)); 894 895 cleanup: 896 if (sa) 897 FREE(sa, M_SONAME); 898 iovec_free(&iov, aiov); 899 if (control) 900 m_freem(control); 901 return (error); 902 } 903 904 /* 905 * shutdown_args(int s, int how) 906 */ 907 int 908 kern_shutdown(int s, int how) 909 { 910 struct thread *td = curthread; 911 struct proc *p = td->td_proc; 912 struct file *fp; 913 int error; 914 915 KKASSERT(p); 916 error = holdsock(p->p_fd, s, &fp); 917 if (error) 918 return (error); 919 error = soshutdown((struct socket *)fp->f_data, how); 920 fdrop(fp, td); 921 return(error); 922 } 923 924 int 925 shutdown(struct shutdown_args *uap) 926 { 927 int error; 928 929 error = kern_shutdown(uap->s, uap->how); 930 931 return (error); 932 } 933 934 /* 935 * If sopt->sopt_td == NULL, then sopt->sopt_val is treated as an 936 * in kernel pointer instead of a userland pointer. This allows us 937 * to manipulate socket options in the emulation code. 938 */ 939 int 940 kern_setsockopt(int s, struct sockopt *sopt) 941 { 942 struct thread *td = curthread; 943 struct proc *p = td->td_proc; 944 struct file *fp; 945 int error; 946 947 if (sopt->sopt_val == 0 && sopt->sopt_valsize != 0) 948 return (EFAULT); 949 if (sopt->sopt_valsize < 0) 950 return (EINVAL); 951 952 error = holdsock(p->p_fd, s, &fp); 953 if (error) 954 return (error); 955 956 error = sosetopt((struct socket *)fp->f_data, sopt); 957 fdrop(fp, td); 958 return (error); 959 } 960 961 /* 962 * setsockopt_args(int s, int level, int name, caddr_t val, int valsize) 963 */ 964 int 965 setsockopt(struct setsockopt_args *uap) 966 { 967 struct thread *td = curthread; 968 struct sockopt sopt; 969 int error; 970 971 sopt.sopt_dir = SOPT_SET; 972 sopt.sopt_level = uap->level; 973 sopt.sopt_name = uap->name; 974 sopt.sopt_val = uap->val; 975 sopt.sopt_valsize = uap->valsize; 976 sopt.sopt_td = td; 977 978 error = kern_setsockopt(uap->s, &sopt); 979 return(error); 980 } 981 982 /* 983 * If sopt->sopt_td == NULL, then sopt->sopt_val is treated as an 984 * in kernel pointer instead of a userland pointer. This allows us 985 * to manipulate socket options in the emulation code. 986 */ 987 int 988 kern_getsockopt(int s, struct sockopt *sopt) 989 { 990 struct thread *td = curthread; 991 struct proc *p = td->td_proc; 992 struct file *fp; 993 int error; 994 995 if (sopt->sopt_val == 0 && sopt->sopt_valsize != 0) 996 return (EFAULT); 997 if (sopt->sopt_valsize < 0) 998 return (EINVAL); 999 1000 error = holdsock(p->p_fd, s, &fp); 1001 if (error) 1002 return (error); 1003 1004 error = sogetopt((struct socket *)fp->f_data, sopt); 1005 fdrop(fp, td); 1006 return (error); 1007 } 1008 1009 /* 1010 * getsockopt_Args(int s, int level, int name, caddr_t val, int *avalsize) 1011 */ 1012 int 1013 getsockopt(struct getsockopt_args *uap) 1014 { 1015 struct thread *td = curthread; 1016 struct sockopt sopt; 1017 int error, valsize; 1018 1019 if (uap->val) { 1020 error = copyin(uap->avalsize, &valsize, sizeof(valsize)); 1021 if (error) 1022 return (error); 1023 if (valsize < 0) 1024 return (EINVAL); 1025 } else { 1026 valsize = 0; 1027 } 1028 1029 sopt.sopt_dir = SOPT_GET; 1030 sopt.sopt_level = uap->level; 1031 sopt.sopt_name = uap->name; 1032 sopt.sopt_val = uap->val; 1033 sopt.sopt_valsize = valsize; 1034 sopt.sopt_td = td; 1035 1036 error = kern_getsockopt(uap->s, &sopt); 1037 if (error == 0) { 1038 valsize = sopt.sopt_valsize; 1039 error = copyout(&valsize, uap->avalsize, sizeof(valsize)); 1040 } 1041 return (error); 1042 } 1043 1044 /* 1045 * The second argument to kern_getsockname() is a handle to a struct sockaddr. 1046 * This allows kern_getsockname() to return a pointer to an allocated struct 1047 * sockaddr which must be freed later with FREE(). The caller must 1048 * initialize *name to NULL. 1049 */ 1050 int 1051 kern_getsockname(int s, struct sockaddr **name, int *namelen) 1052 { 1053 struct thread *td = curthread; 1054 struct proc *p = td->td_proc; 1055 struct file *fp; 1056 struct socket *so; 1057 struct sockaddr *sa = NULL; 1058 int error; 1059 1060 error = holdsock(p->p_fd, s, &fp); 1061 if (error) 1062 return (error); 1063 if (*namelen < 0) { 1064 fdrop(fp, td); 1065 return (EINVAL); 1066 } 1067 so = (struct socket *)fp->f_data; 1068 error = so_pru_sockaddr(so, &sa); 1069 if (error == 0) { 1070 if (sa == 0) { 1071 *namelen = 0; 1072 } else { 1073 *namelen = MIN(*namelen, sa->sa_len); 1074 *name = sa; 1075 } 1076 } 1077 1078 fdrop(fp, td); 1079 return (error); 1080 } 1081 1082 /* 1083 * getsockname_args(int fdes, caddr_t asa, int *alen) 1084 * 1085 * Get socket name. 1086 */ 1087 int 1088 getsockname(struct getsockname_args *uap) 1089 { 1090 struct sockaddr *sa = NULL; 1091 int error, sa_len; 1092 1093 error = copyin(uap->alen, &sa_len, sizeof(sa_len)); 1094 if (error) 1095 return (error); 1096 1097 error = kern_getsockname(uap->fdes, &sa, &sa_len); 1098 1099 if (error == 0) 1100 error = copyout(sa, uap->asa, sa_len); 1101 if (error == 0) 1102 error = copyout(&sa_len, uap->alen, sizeof(*uap->alen)); 1103 if (sa) 1104 FREE(sa, M_SONAME); 1105 return (error); 1106 } 1107 1108 /* 1109 * The second argument to kern_getpeername() is a handle to a struct sockaddr. 1110 * This allows kern_getpeername() to return a pointer to an allocated struct 1111 * sockaddr which must be freed later with FREE(). The caller must 1112 * initialize *name to NULL. 1113 */ 1114 int 1115 kern_getpeername(int s, struct sockaddr **name, int *namelen) 1116 { 1117 struct thread *td = curthread; 1118 struct proc *p = td->td_proc; 1119 struct file *fp; 1120 struct socket *so; 1121 struct sockaddr *sa = NULL; 1122 int error; 1123 1124 error = holdsock(p->p_fd, s, &fp); 1125 if (error) 1126 return (error); 1127 if (*namelen < 0) { 1128 fdrop(fp, td); 1129 return (EINVAL); 1130 } 1131 so = (struct socket *)fp->f_data; 1132 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1133 fdrop(fp, td); 1134 return (ENOTCONN); 1135 } 1136 error = so_pru_peeraddr(so, &sa); 1137 if (error == 0) { 1138 if (sa == 0) { 1139 *namelen = 0; 1140 } else { 1141 *namelen = MIN(*namelen, sa->sa_len); 1142 *name = sa; 1143 } 1144 } 1145 1146 fdrop(fp, td); 1147 return (error); 1148 } 1149 1150 /* 1151 * getpeername_args(int fdes, caddr_t asa, int *alen) 1152 * 1153 * Get name of peer for connected socket. 1154 */ 1155 int 1156 getpeername(struct getpeername_args *uap) 1157 { 1158 struct sockaddr *sa = NULL; 1159 int error, sa_len; 1160 1161 error = copyin(uap->alen, &sa_len, sizeof(sa_len)); 1162 if (error) 1163 return (error); 1164 1165 error = kern_getpeername(uap->fdes, &sa, &sa_len); 1166 1167 if (error == 0) 1168 error = copyout(sa, uap->asa, sa_len); 1169 if (error == 0) 1170 error = copyout(&sa_len, uap->alen, sizeof(*uap->alen)); 1171 if (sa) 1172 FREE(sa, M_SONAME); 1173 return (error); 1174 } 1175 1176 int 1177 getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len) 1178 { 1179 struct sockaddr *sa; 1180 int error; 1181 1182 *namp = NULL; 1183 if (len > SOCK_MAXADDRLEN) 1184 return ENAMETOOLONG; 1185 if (len < offsetof(struct sockaddr, sa_data[0])) 1186 return EDOM; 1187 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1188 error = copyin(uaddr, sa, len); 1189 if (error) { 1190 FREE(sa, M_SONAME); 1191 } else { 1192 #if BYTE_ORDER != BIG_ENDIAN 1193 /* 1194 * The bind(), connect(), and sendto() syscalls were not 1195 * versioned for COMPAT_43. Thus, this check must stay. 1196 */ 1197 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1198 sa->sa_family = sa->sa_len; 1199 #endif 1200 sa->sa_len = len; 1201 *namp = sa; 1202 } 1203 return error; 1204 } 1205 1206 /* 1207 * holdsock() - load the struct file pointer associated 1208 * with a socket into *fpp. If an error occurs, non-zero 1209 * will be returned and *fpp will be set to NULL. 1210 */ 1211 int 1212 holdsock(fdp, fdes, fpp) 1213 struct filedesc *fdp; 1214 int fdes; 1215 struct file **fpp; 1216 { 1217 struct file *fp = NULL; 1218 int error = 0; 1219 1220 if ((unsigned)fdes >= fdp->fd_nfiles || 1221 (fp = fdp->fd_ofiles[fdes]) == NULL) { 1222 error = EBADF; 1223 } else if (fp->f_type != DTYPE_SOCKET) { 1224 error = ENOTSOCK; 1225 fp = NULL; 1226 } else { 1227 fhold(fp); 1228 } 1229 *fpp = fp; 1230 return(error); 1231 } 1232 1233 /* 1234 * sendfile(2). 1235 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1236 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1237 * 1238 * Send a file specified by 'fd' and starting at 'offset' to a socket 1239 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1240 * nbytes == 0. Optionally add a header and/or trailer to the socket 1241 * output. If specified, write the total number of bytes sent into *sbytes. 1242 * 1243 * In FreeBSD kern/uipc_syscalls.c,v 1.103, a bug was fixed that caused 1244 * the headers to count against the remaining bytes to be sent from 1245 * the file descriptor. We may wish to implement a compatibility syscall 1246 * in the future. 1247 */ 1248 int 1249 sendfile(struct sendfile_args *uap) 1250 { 1251 struct thread *td = curthread; 1252 struct proc *p = td->td_proc; 1253 struct file *fp; 1254 struct filedesc *fdp; 1255 struct vnode *vp = NULL; 1256 struct sf_hdtr hdtr; 1257 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 1258 struct uio auio; 1259 off_t hdtr_size = 0, sbytes; 1260 int error, res; 1261 1262 KKASSERT(p); 1263 fdp = p->p_fd; 1264 1265 /* 1266 * Do argument checking. Must be a regular file in, stream 1267 * type and connected socket out, positive offset. 1268 */ 1269 fp = holdfp(fdp, uap->fd, FREAD); 1270 if (fp == NULL) { 1271 return (EBADF); 1272 } 1273 if (fp->f_type != DTYPE_VNODE) { 1274 fdrop(fp, td); 1275 return (EINVAL); 1276 } 1277 vp = (struct vnode *)fp->f_data; 1278 vref(vp); 1279 fdrop(fp, td); 1280 1281 /* 1282 * If specified, get the pointer to the sf_hdtr struct for 1283 * any headers/trailers. 1284 */ 1285 if (uap->hdtr) { 1286 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1287 if (error) 1288 goto done; 1289 /* 1290 * Send any headers. 1291 */ 1292 if (hdtr.headers) { 1293 error = iovec_copyin(hdtr.headers, &iov, aiov, 1294 hdtr.hdr_cnt, &auio.uio_resid); 1295 if (error) 1296 goto done; 1297 auio.uio_iov = iov; 1298 auio.uio_iovcnt = hdtr.hdr_cnt; 1299 auio.uio_offset = 0; 1300 auio.uio_segflg = UIO_USERSPACE; 1301 auio.uio_rw = UIO_WRITE; 1302 auio.uio_td = td; 1303 1304 error = kern_sendmsg(uap->s, NULL, &auio, NULL, 0, 1305 &res); 1306 1307 iovec_free(&iov, aiov); 1308 if (error) 1309 goto done; 1310 hdtr_size += res; 1311 } 1312 } 1313 1314 error = kern_sendfile(vp, uap->s, uap->offset, uap->nbytes, 1315 &sbytes, uap->flags); 1316 if (error) 1317 goto done; 1318 1319 /* 1320 * Send trailers. Wimp out and use writev(2). 1321 */ 1322 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1323 error = iovec_copyin(hdtr.trailers, &iov, aiov, 1324 hdtr.trl_cnt, &auio.uio_resid); 1325 if (error) 1326 goto done; 1327 auio.uio_iov = iov; 1328 auio.uio_iovcnt = hdtr.trl_cnt; 1329 auio.uio_offset = 0; 1330 auio.uio_segflg = UIO_USERSPACE; 1331 auio.uio_rw = UIO_WRITE; 1332 auio.uio_td = td; 1333 1334 error = kern_sendmsg(uap->s, NULL, &auio, NULL, 0, &res); 1335 1336 iovec_free(&iov, aiov); 1337 if (error) 1338 goto done; 1339 hdtr_size += res; 1340 } 1341 1342 done: 1343 if (uap->sbytes != NULL) { 1344 sbytes += hdtr_size; 1345 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1346 } 1347 if (vp) 1348 vrele(vp); 1349 return (error); 1350 } 1351 1352 int 1353 kern_sendfile(struct vnode *vp, int s, off_t offset, size_t nbytes, 1354 off_t *sbytes, int flags) 1355 { 1356 struct thread *td = curthread; 1357 struct proc *p = td->td_proc; 1358 struct vm_object *obj; 1359 struct socket *so; 1360 struct file *fp; 1361 struct mbuf *m; 1362 struct sf_buf *sf; 1363 struct vm_page *pg; 1364 off_t off, xfsize; 1365 int error = 0; 1366 1367 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1368 error = EINVAL; 1369 goto done; 1370 } 1371 error = holdsock(p->p_fd, s, &fp); 1372 if (error) 1373 goto done; 1374 so = (struct socket *)fp->f_data; 1375 if (so->so_type != SOCK_STREAM) { 1376 error = EINVAL; 1377 goto done; 1378 } 1379 if ((so->so_state & SS_ISCONNECTED) == 0) { 1380 error = ENOTCONN; 1381 goto done; 1382 } 1383 if (offset < 0) { 1384 error = EINVAL; 1385 goto done; 1386 } 1387 1388 *sbytes = 0; 1389 /* 1390 * Protect against multiple writers to the socket. 1391 */ 1392 (void) sblock(&so->so_snd, M_WAITOK); 1393 1394 /* 1395 * Loop through the pages in the file, starting with the requested 1396 * offset. Get a file page (do I/O if necessary), map the file page 1397 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1398 * it on the socket. 1399 */ 1400 for (off = offset; ; off += xfsize, *sbytes += xfsize) { 1401 vm_pindex_t pindex; 1402 vm_offset_t pgoff; 1403 1404 pindex = OFF_TO_IDX(off); 1405 retry_lookup: 1406 /* 1407 * Calculate the amount to transfer. Not to exceed a page, 1408 * the EOF, or the passed in nbytes. 1409 */ 1410 xfsize = obj->un_pager.vnp.vnp_size - off; 1411 if (xfsize > PAGE_SIZE) 1412 xfsize = PAGE_SIZE; 1413 pgoff = (vm_offset_t)(off & PAGE_MASK); 1414 if (PAGE_SIZE - pgoff < xfsize) 1415 xfsize = PAGE_SIZE - pgoff; 1416 if (nbytes && xfsize > (nbytes - *sbytes)) 1417 xfsize = nbytes - *sbytes; 1418 if (xfsize <= 0) 1419 break; 1420 /* 1421 * Optimize the non-blocking case by looking at the socket space 1422 * before going to the extra work of constituting the sf_buf. 1423 */ 1424 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1425 if (so->so_state & SS_CANTSENDMORE) 1426 error = EPIPE; 1427 else 1428 error = EAGAIN; 1429 sbunlock(&so->so_snd); 1430 goto done; 1431 } 1432 /* 1433 * Attempt to look up the page. 1434 * 1435 * Allocate if not found 1436 * 1437 * Wait and loop if busy. 1438 */ 1439 pg = vm_page_lookup(obj, pindex); 1440 1441 if (pg == NULL) { 1442 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL); 1443 if (pg == NULL) { 1444 VM_WAIT; 1445 goto retry_lookup; 1446 } 1447 vm_page_wakeup(pg); 1448 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) { 1449 goto retry_lookup; 1450 } 1451 1452 /* 1453 * Wire the page so it does not get ripped out from under 1454 * us. 1455 */ 1456 1457 vm_page_wire(pg); 1458 1459 /* 1460 * If page is not valid for what we need, initiate I/O 1461 */ 1462 1463 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1464 struct uio auio; 1465 struct iovec aiov; 1466 int bsize; 1467 1468 /* 1469 * Ensure that our page is still around when the I/O 1470 * completes. 1471 */ 1472 vm_page_io_start(pg); 1473 1474 /* 1475 * Get the page from backing store. 1476 */ 1477 bsize = vp->v_mount->mnt_stat.f_iosize; 1478 auio.uio_iov = &aiov; 1479 auio.uio_iovcnt = 1; 1480 aiov.iov_base = 0; 1481 aiov.iov_len = MAXBSIZE; 1482 auio.uio_resid = MAXBSIZE; 1483 auio.uio_offset = trunc_page(off); 1484 auio.uio_segflg = UIO_NOCOPY; 1485 auio.uio_rw = UIO_READ; 1486 auio.uio_td = td; 1487 vn_lock(vp, NULL, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1488 error = VOP_READ(vp, &auio, 1489 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1490 p->p_ucred); 1491 VOP_UNLOCK(vp, NULL, 0, td); 1492 vm_page_flag_clear(pg, PG_ZERO); 1493 vm_page_io_finish(pg); 1494 if (error) { 1495 vm_page_unwire(pg, 0); 1496 /* 1497 * See if anyone else might know about this page. 1498 * If not and it is not valid, then free it. 1499 */ 1500 if (pg->wire_count == 0 && pg->valid == 0 && 1501 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1502 pg->hold_count == 0) { 1503 vm_page_busy(pg); 1504 vm_page_free(pg); 1505 } 1506 sbunlock(&so->so_snd); 1507 goto done; 1508 } 1509 } 1510 1511 1512 /* 1513 * Get a sendfile buf. We usually wait as long as necessary, 1514 * but this wait can be interrupted. 1515 */ 1516 if ((sf = sf_buf_alloc(pg)) == NULL) { 1517 s = splvm(); 1518 vm_page_unwire(pg, 0); 1519 if (pg->wire_count == 0 && pg->object == NULL) 1520 vm_page_free(pg); 1521 splx(s); 1522 sbunlock(&so->so_snd); 1523 error = EINTR; 1524 goto done; 1525 } 1526 1527 /* 1528 * Get an mbuf header and set it up as having external storage. 1529 */ 1530 MGETHDR(m, M_WAIT, MT_DATA); 1531 if (m == NULL) { 1532 error = ENOBUFS; 1533 sf_buf_free((void *)sf->kva, PAGE_SIZE); 1534 sbunlock(&so->so_snd); 1535 goto done; 1536 } 1537 m->m_ext.ext_free = sf_buf_free; 1538 m->m_ext.ext_ref = sf_buf_ref; 1539 m->m_ext.ext_buf = (void *)sf->kva; 1540 m->m_ext.ext_size = PAGE_SIZE; 1541 m->m_data = (char *) sf->kva + pgoff; 1542 m->m_flags |= M_EXT; 1543 m->m_pkthdr.len = m->m_len = xfsize; 1544 /* 1545 * Add the buffer to the socket buffer chain. 1546 */ 1547 s = splnet(); 1548 retry_space: 1549 /* 1550 * Make sure that the socket is still able to take more data. 1551 * CANTSENDMORE being true usually means that the connection 1552 * was closed. so_error is true when an error was sensed after 1553 * a previous send. 1554 * The state is checked after the page mapping and buffer 1555 * allocation above since those operations may block and make 1556 * any socket checks stale. From this point forward, nothing 1557 * blocks before the pru_send (or more accurately, any blocking 1558 * results in a loop back to here to re-check). 1559 */ 1560 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1561 if (so->so_state & SS_CANTSENDMORE) { 1562 error = EPIPE; 1563 } else { 1564 error = so->so_error; 1565 so->so_error = 0; 1566 } 1567 m_freem(m); 1568 sbunlock(&so->so_snd); 1569 splx(s); 1570 goto done; 1571 } 1572 /* 1573 * Wait for socket space to become available. We do this just 1574 * after checking the connection state above in order to avoid 1575 * a race condition with sbwait(). 1576 */ 1577 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1578 if (so->so_state & SS_NBIO) { 1579 m_freem(m); 1580 sbunlock(&so->so_snd); 1581 splx(s); 1582 error = EAGAIN; 1583 goto done; 1584 } 1585 error = sbwait(&so->so_snd); 1586 /* 1587 * An error from sbwait usually indicates that we've 1588 * been interrupted by a signal. If we've sent anything 1589 * then return bytes sent, otherwise return the error. 1590 */ 1591 if (error) { 1592 m_freem(m); 1593 sbunlock(&so->so_snd); 1594 splx(s); 1595 goto done; 1596 } 1597 goto retry_space; 1598 } 1599 error = so_pru_send(so, 0, m, NULL, NULL, td); 1600 splx(s); 1601 if (error) { 1602 sbunlock(&so->so_snd); 1603 goto done; 1604 } 1605 } 1606 sbunlock(&so->so_snd); 1607 1608 done: 1609 if (fp) 1610 fdrop(fp, td); 1611 return (error); 1612 } 1613