1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.65.2.17 2003/04/04 17:11:16 tegge Exp $ 38 * $DragonFly: src/sys/kern/uipc_syscalls.c,v 1.23 2003/12/20 05:53:59 dillon Exp $ 39 */ 40 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/sysproto.h> 47 #include <sys/malloc.h> 48 #include <sys/filedesc.h> 49 #include <sys/event.h> 50 #include <sys/proc.h> 51 #include <sys/fcntl.h> 52 #include <sys/file.h> 53 #include <sys/filio.h> 54 #include <sys/kern_syscall.h> 55 #include <sys/mbuf.h> 56 #include <sys/protosw.h> 57 #include <sys/sfbuf.h> 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/signalvar.h> 61 #include <sys/uio.h> 62 #include <sys/vnode.h> 63 #include <sys/lock.h> 64 #include <sys/mount.h> 65 #ifdef KTRACE 66 #include <sys/ktrace.h> 67 #endif 68 #include <vm/vm.h> 69 #include <vm/vm_object.h> 70 #include <vm/vm_page.h> 71 #include <vm/vm_pageout.h> 72 #include <vm/vm_kern.h> 73 #include <vm/vm_extern.h> 74 #include <sys/file2.h> 75 76 /* 77 * System call interface to the socket abstraction. 78 */ 79 80 extern struct fileops socketops; 81 82 /* 83 * socket_args(int domain, int type, int protocol) 84 */ 85 int 86 kern_socket(int domain, int type, int protocol, int *res) 87 { 88 struct thread *td = curthread; 89 struct proc *p = td->td_proc; 90 struct filedesc *fdp; 91 struct socket *so; 92 struct file *fp; 93 int fd, error; 94 95 KKASSERT(p); 96 fdp = p->p_fd; 97 98 error = falloc(p, &fp, &fd); 99 if (error) 100 return (error); 101 fhold(fp); 102 error = socreate(domain, &so, type, protocol, td); 103 if (error) { 104 if (fdp->fd_ofiles[fd] == fp) { 105 fdp->fd_ofiles[fd] = NULL; 106 fdrop(fp, td); 107 } 108 } else { 109 fp->f_data = (caddr_t)so; 110 fp->f_flag = FREAD|FWRITE; 111 fp->f_ops = &socketops; 112 fp->f_type = DTYPE_SOCKET; 113 *res = fd; 114 } 115 fdrop(fp, td); 116 return (error); 117 } 118 119 int 120 socket(struct socket_args *uap) 121 { 122 int error; 123 124 error = kern_socket(uap->domain, uap->type, uap->protocol, 125 &uap->sysmsg_result); 126 127 return (error); 128 } 129 int 130 kern_bind(int s, struct sockaddr *sa) 131 { 132 struct thread *td = curthread; 133 struct proc *p = td->td_proc; 134 struct file *fp; 135 int error; 136 137 KKASSERT(p); 138 error = holdsock(p->p_fd, s, &fp); 139 if (error) 140 return (error); 141 error = sobind((struct socket *)fp->f_data, sa, td); 142 fdrop(fp, td); 143 return (error); 144 } 145 146 /* 147 * bind_args(int s, caddr_t name, int namelen) 148 */ 149 int 150 bind(struct bind_args *uap) 151 { 152 struct sockaddr *sa; 153 int error; 154 155 error = getsockaddr(&sa, uap->name, uap->namelen); 156 if (error) 157 return (error); 158 error = kern_bind(uap->s, sa); 159 FREE(sa, M_SONAME); 160 161 return (error); 162 } 163 164 int 165 kern_listen(int s, int backlog) 166 { 167 struct thread *td = curthread; 168 struct proc *p = td->td_proc; 169 struct file *fp; 170 int error; 171 172 KKASSERT(p); 173 error = holdsock(p->p_fd, s, &fp); 174 if (error) 175 return (error); 176 error = solisten((struct socket *)fp->f_data, backlog, td); 177 fdrop(fp, td); 178 return(error); 179 } 180 181 /* 182 * listen_args(int s, int backlog) 183 */ 184 int 185 listen(struct listen_args *uap) 186 { 187 int error; 188 189 error = kern_listen(uap->s, uap->backlog); 190 return (error); 191 } 192 193 /* 194 * The second argument to kern_accept() is a handle to a struct sockaddr. 195 * This allows kern_accept() to return a pointer to an allocated struct 196 * sockaddr which must be freed later with FREE(). The caller must 197 * initialize *name to NULL. 198 */ 199 int 200 kern_accept(int s, struct sockaddr **name, int *namelen, int *res) 201 { 202 struct thread *td = curthread; 203 struct proc *p = td->td_proc; 204 struct filedesc *fdp = p->p_fd; 205 struct file *lfp = NULL; 206 struct file *nfp = NULL; 207 struct sockaddr *sa; 208 int error, s1; 209 struct socket *head, *so; 210 int fd; 211 u_int fflag; /* type must match fp->f_flag */ 212 int tmp; 213 214 if (name && namelen && *namelen < 0) 215 return (EINVAL); 216 217 error = holdsock(fdp, s, &lfp); 218 if (error) 219 return (error); 220 s1 = splnet(); 221 head = (struct socket *)lfp->f_data; 222 if ((head->so_options & SO_ACCEPTCONN) == 0) { 223 splx(s1); 224 error = EINVAL; 225 goto done; 226 } 227 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 228 if (head->so_state & SS_CANTRCVMORE) { 229 head->so_error = ECONNABORTED; 230 break; 231 } 232 if ((head->so_state & SS_NBIO) != 0) { 233 head->so_error = EWOULDBLOCK; 234 break; 235 } 236 error = tsleep((caddr_t)&head->so_timeo, PCATCH, "accept", 0); 237 if (error) { 238 splx(s1); 239 goto done; 240 } 241 } 242 if (head->so_error) { 243 error = head->so_error; 244 head->so_error = 0; 245 splx(s1); 246 goto done; 247 } 248 249 /* 250 * At this point we know that there is at least one connection 251 * ready to be accepted. Remove it from the queue prior to 252 * allocating the file descriptor for it since falloc() may 253 * block allowing another process to accept the connection 254 * instead. 255 */ 256 so = TAILQ_FIRST(&head->so_comp); 257 TAILQ_REMOVE(&head->so_comp, so, so_list); 258 head->so_qlen--; 259 260 fflag = lfp->f_flag; 261 error = falloc(p, &nfp, &fd); 262 if (error) { 263 /* 264 * Probably ran out of file descriptors. Put the 265 * unaccepted connection back onto the queue and 266 * do another wakeup so some other process might 267 * have a chance at it. 268 */ 269 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 270 head->so_qlen++; 271 wakeup_one(&head->so_timeo); 272 splx(s1); 273 goto done; 274 } 275 fhold(nfp); 276 *res = fd; 277 278 /* connection has been removed from the listen queue */ 279 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 280 281 so->so_state &= ~SS_COMP; 282 so->so_head = NULL; 283 if (head->so_sigio != NULL) 284 fsetown(fgetown(head->so_sigio), &so->so_sigio); 285 286 nfp->f_data = (caddr_t)so; 287 nfp->f_flag = fflag; 288 nfp->f_ops = &socketops; 289 nfp->f_type = DTYPE_SOCKET; 290 /* Sync socket nonblocking/async state with file flags */ 291 tmp = fflag & FNONBLOCK; 292 (void) fo_ioctl(nfp, FIONBIO, (caddr_t)&tmp, td); 293 tmp = fflag & FASYNC; 294 (void) fo_ioctl(nfp, FIOASYNC, (caddr_t)&tmp, td); 295 296 sa = NULL; 297 error = soaccept(so, &sa); 298 299 /* 300 * Set the returned name and namelen as applicable. Set the returned 301 * namelen to 0 for older code which might ignore the return value 302 * from accept. 303 */ 304 if (error == 0) { 305 if (sa && name && namelen) { 306 if (*namelen > sa->sa_len) 307 *namelen = sa->sa_len; 308 *name = sa; 309 } else { 310 if (sa) 311 FREE(sa, M_SONAME); 312 } 313 } 314 315 /* 316 * close the new descriptor, assuming someone hasn't ripped it 317 * out from under us. Note that *res is normally ignored if an 318 * error is returned but a syscall message will still have access 319 * to the result code. 320 */ 321 if (error) { 322 *res = -1; 323 if (fdp->fd_ofiles[fd] == nfp) { 324 fdp->fd_ofiles[fd] = NULL; 325 fdrop(nfp, td); 326 } 327 } 328 splx(s1); 329 330 /* 331 * Release explicitly held references before returning. 332 */ 333 done: 334 if (nfp != NULL) 335 fdrop(nfp, td); 336 fdrop(lfp, td); 337 return (error); 338 } 339 340 /* 341 * accept_args(int s, caddr_t name, int *anamelen) 342 */ 343 int 344 accept(struct accept_args *uap) 345 { 346 struct sockaddr *sa = NULL; 347 int sa_len; 348 int error; 349 350 if (uap->name) { 351 error = copyin(uap->anamelen, &sa_len, sizeof(sa_len)); 352 if (error) 353 return (error); 354 355 error = kern_accept(uap->s, &sa, &sa_len, &uap->sysmsg_result); 356 357 if (error == 0) 358 error = copyout(sa, uap->name, sa_len); 359 if (error == 0) { 360 error = copyout(&sa_len, uap->anamelen, 361 sizeof(*uap->anamelen)); 362 } 363 if (sa) 364 FREE(sa, M_SONAME); 365 } else { 366 error = kern_accept(uap->s, NULL, 0, &uap->sysmsg_result); 367 } 368 return (error); 369 } 370 371 int 372 kern_connect(int s, struct sockaddr *sa) 373 { 374 struct thread *td = curthread; 375 struct proc *p = td->td_proc; 376 struct file *fp; 377 struct socket *so; 378 int error; 379 380 error = holdsock(p->p_fd, s, &fp); 381 if (error) 382 return (error); 383 so = (struct socket *)fp->f_data; 384 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 385 error = EALREADY; 386 goto done; 387 } 388 error = soconnect(so, sa, td); 389 if (error) 390 goto bad; 391 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 392 error = EINPROGRESS; 393 goto done; 394 } 395 s = splnet(); 396 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 397 error = tsleep((caddr_t)&so->so_timeo, PCATCH, "connec", 0); 398 if (error) 399 break; 400 } 401 if (error == 0) { 402 error = so->so_error; 403 so->so_error = 0; 404 } 405 splx(s); 406 bad: 407 so->so_state &= ~SS_ISCONNECTING; 408 if (error == ERESTART) 409 error = EINTR; 410 done: 411 fdrop(fp, td); 412 return (error); 413 } 414 415 /* 416 * connect_args(int s, caddr_t name, int namelen) 417 */ 418 int 419 connect(struct connect_args *uap) 420 { 421 struct sockaddr *sa; 422 int error; 423 424 error = getsockaddr(&sa, uap->name, uap->namelen); 425 if (error) 426 return (error); 427 error = kern_connect(uap->s, sa); 428 FREE(sa, M_SONAME); 429 430 return (error); 431 } 432 433 int 434 kern_socketpair(int domain, int type, int protocol, int *sv) 435 { 436 struct thread *td = curthread; 437 struct proc *p = td->td_proc; 438 struct filedesc *fdp; 439 struct file *fp1, *fp2; 440 struct socket *so1, *so2; 441 int fd, error; 442 443 KKASSERT(p); 444 fdp = p->p_fd; 445 error = socreate(domain, &so1, type, protocol, td); 446 if (error) 447 return (error); 448 error = socreate(domain, &so2, type, protocol, td); 449 if (error) 450 goto free1; 451 error = falloc(p, &fp1, &fd); 452 if (error) 453 goto free2; 454 fhold(fp1); 455 sv[0] = fd; 456 fp1->f_data = (caddr_t)so1; 457 error = falloc(p, &fp2, &fd); 458 if (error) 459 goto free3; 460 fhold(fp2); 461 fp2->f_data = (caddr_t)so2; 462 sv[1] = fd; 463 error = soconnect2(so1, so2); 464 if (error) 465 goto free4; 466 if (type == SOCK_DGRAM) { 467 /* 468 * Datagram socket connection is asymmetric. 469 */ 470 error = soconnect2(so2, so1); 471 if (error) 472 goto free4; 473 } 474 fp1->f_flag = fp2->f_flag = FREAD|FWRITE; 475 fp1->f_ops = fp2->f_ops = &socketops; 476 fp1->f_type = fp2->f_type = DTYPE_SOCKET; 477 fdrop(fp1, td); 478 fdrop(fp2, td); 479 return (error); 480 free4: 481 if (fdp->fd_ofiles[sv[1]] == fp2) { 482 fdp->fd_ofiles[sv[1]] = NULL; 483 fdrop(fp2, td); 484 } 485 fdrop(fp2, td); 486 free3: 487 if (fdp->fd_ofiles[sv[0]] == fp1) { 488 fdp->fd_ofiles[sv[0]] = NULL; 489 fdrop(fp1, td); 490 } 491 fdrop(fp1, td); 492 free2: 493 (void)soclose(so2); 494 free1: 495 (void)soclose(so1); 496 return (error); 497 } 498 499 /* 500 * socketpair(int domain, int type, int protocol, int *rsv) 501 */ 502 int 503 socketpair(struct socketpair_args *uap) 504 { 505 int error, sockv[2]; 506 507 error = kern_socketpair(uap->domain, uap->type, uap->protocol, sockv); 508 509 if (error == 0) 510 error = copyout(sockv, uap->rsv, sizeof(sockv)); 511 return (error); 512 } 513 514 int 515 kern_sendmsg(int s, struct sockaddr *sa, struct uio *auio, 516 struct mbuf *control, int flags, int *res) 517 { 518 struct thread *td = curthread; 519 struct proc *p = td->td_proc; 520 struct file *fp; 521 int len, error; 522 struct socket *so; 523 #ifdef KTRACE 524 struct iovec *ktriov = NULL; 525 struct uio ktruio; 526 #endif 527 528 error = holdsock(p->p_fd, s, &fp); 529 if (error) 530 return (error); 531 if (auio->uio_resid < 0) { 532 error = EINVAL; 533 goto done; 534 } 535 #ifdef KTRACE 536 if (KTRPOINT(td, KTR_GENIO)) { 537 int iovlen = auio->uio_iovcnt * sizeof (struct iovec); 538 539 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 540 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen); 541 ktruio = *auio; 542 } 543 #endif 544 len = auio->uio_resid; 545 so = (struct socket *)fp->f_data; 546 error = so->so_proto->pr_usrreqs->pru_sosend(so, sa, auio, NULL, 547 control, flags, td); 548 if (error) { 549 if (auio->uio_resid != len && (error == ERESTART || 550 error == EINTR || error == EWOULDBLOCK)) 551 error = 0; 552 if (error == EPIPE) 553 psignal(p, SIGPIPE); 554 } 555 #ifdef KTRACE 556 if (ktriov != NULL) { 557 if (error == 0) { 558 ktruio.uio_iov = ktriov; 559 ktruio.uio_resid = len - auio->uio_resid; 560 ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error); 561 } 562 FREE(ktriov, M_TEMP); 563 } 564 #endif 565 if (error == 0) 566 *res = len - auio->uio_resid; 567 done: 568 fdrop(fp, td); 569 return (error); 570 } 571 572 /* 573 * sendto_args(int s, caddr_t buf, size_t len, int flags, caddr_t to, int tolen) 574 */ 575 int 576 sendto(struct sendto_args *uap) 577 { 578 struct thread *td = curthread; 579 struct uio auio; 580 struct iovec aiov; 581 struct sockaddr *sa = NULL; 582 int error; 583 584 if (uap->to) { 585 error = getsockaddr(&sa, uap->to, uap->tolen); 586 if (error) 587 return (error); 588 } 589 aiov.iov_base = uap->buf; 590 aiov.iov_len = uap->len; 591 auio.uio_iov = &aiov; 592 auio.uio_iovcnt = 1; 593 auio.uio_offset = 0; 594 auio.uio_resid = uap->len; 595 auio.uio_segflg = UIO_USERSPACE; 596 auio.uio_rw = UIO_WRITE; 597 auio.uio_td = td; 598 599 error = kern_sendmsg(uap->s, sa, &auio, NULL, uap->flags, 600 &uap->sysmsg_result); 601 602 if (sa) 603 FREE(sa, M_SONAME); 604 return (error); 605 } 606 607 /* 608 * sendmsg_args(int s, caddr_t msg, int flags) 609 */ 610 int 611 sendmsg(struct sendmsg_args *uap) 612 { 613 struct thread *td = curthread; 614 struct msghdr msg; 615 struct uio auio; 616 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 617 struct sockaddr *sa = NULL; 618 struct mbuf *control = NULL; 619 int error; 620 621 error = copyin(uap->msg, (caddr_t)&msg, sizeof(msg)); 622 if (error) 623 return (error); 624 625 /* 626 * Conditionally copyin msg.msg_name. 627 */ 628 if (msg.msg_name) { 629 error = getsockaddr(&sa, msg.msg_name, msg.msg_namelen); 630 if (error) 631 return (error); 632 } 633 634 /* 635 * Populate auio. 636 */ 637 error = iovec_copyin(msg.msg_iov, &iov, aiov, msg.msg_iovlen, 638 &auio.uio_resid); 639 if (error) 640 goto cleanup; 641 auio.uio_iov = iov; 642 auio.uio_iovcnt = msg.msg_iovlen; 643 auio.uio_offset = 0; 644 auio.uio_segflg = UIO_USERSPACE; 645 auio.uio_rw = UIO_WRITE; 646 auio.uio_td = td; 647 648 /* 649 * Conditionally copyin msg.msg_control. 650 */ 651 if (msg.msg_control) { 652 if (msg.msg_controllen < sizeof(struct cmsghdr) || 653 msg.msg_controllen > MLEN) { 654 error = EINVAL; 655 goto cleanup; 656 } 657 control = m_get(M_WAIT, MT_CONTROL); 658 if (control == NULL) { 659 error = ENOBUFS; 660 goto cleanup; 661 } 662 control->m_len = msg.msg_controllen; 663 error = copyin(msg.msg_control, mtod(control, caddr_t), 664 msg.msg_controllen); 665 if (error) { 666 m_free(control); 667 goto cleanup; 668 } 669 } 670 671 error = kern_sendmsg(uap->s, sa, &auio, control, uap->flags, 672 &uap->sysmsg_result); 673 674 cleanup: 675 if (sa) 676 FREE(sa, M_SONAME); 677 iovec_free(&iov, aiov); 678 return (error); 679 } 680 681 /* 682 * kern_recvmsg() takes a handle to sa and control. If the handle is non- 683 * null, it returns a dynamically allocated struct sockaddr and an mbuf. 684 * Don't forget to FREE() and m_free() these if they are returned. 685 */ 686 int 687 kern_recvmsg(int s, struct sockaddr **sa, struct uio *auio, 688 struct mbuf **control, int *flags, int *res) 689 { 690 struct thread *td = curthread; 691 struct proc *p = td->td_proc; 692 struct file *fp; 693 int len, error; 694 struct socket *so; 695 #ifdef KTRACE 696 struct iovec *ktriov = NULL; 697 struct uio ktruio; 698 #endif 699 700 error = holdsock(p->p_fd, s, &fp); 701 if (error) 702 return (error); 703 if (auio->uio_resid < 0) { 704 error = EINVAL; 705 goto done; 706 } 707 #ifdef KTRACE 708 if (KTRPOINT(td, KTR_GENIO)) { 709 int iovlen = auio->uio_iovcnt * sizeof (struct iovec); 710 711 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 712 bcopy(auio->uio_iov, ktriov, iovlen); 713 ktruio = *auio; 714 } 715 #endif 716 len = auio->uio_resid; 717 so = (struct socket *)fp->f_data; 718 error = so->so_proto->pr_usrreqs->pru_soreceive(so, sa, auio, NULL, 719 control, flags); 720 if (error) { 721 if (auio->uio_resid != len && (error == ERESTART || 722 error == EINTR || error == EWOULDBLOCK)) 723 error = 0; 724 } 725 #ifdef KTRACE 726 if (ktriov != NULL) { 727 if (error == 0) { 728 ktruio.uio_iov = ktriov; 729 ktruio.uio_resid = len - auio->uio_resid; 730 ktrgenio(p->p_tracep, s, UIO_READ, &ktruio, error); 731 } 732 FREE(ktriov, M_TEMP); 733 } 734 #endif 735 if (error == 0) 736 *res = len - auio->uio_resid; 737 done: 738 fdrop(fp, td); 739 return (error); 740 } 741 742 /* 743 * recvfrom_args(int s, caddr_t buf, size_t len, int flags, 744 * caddr_t from, int *fromlenaddr) 745 */ 746 int 747 recvfrom(struct recvfrom_args *uap) 748 { 749 struct thread *td = curthread; 750 struct uio auio; 751 struct iovec aiov; 752 struct sockaddr *sa = NULL; 753 int error, fromlen; 754 755 if (uap->from && uap->fromlenaddr) { 756 error = copyin(uap->fromlenaddr, &fromlen, sizeof(fromlen)); 757 if (error) 758 return (error); 759 if (fromlen < 0) 760 return (EINVAL); 761 } else { 762 fromlen = 0; 763 } 764 aiov.iov_base = uap->buf; 765 aiov.iov_len = uap->len; 766 auio.uio_iov = &aiov; 767 auio.uio_iovcnt = 1; 768 auio.uio_offset = 0; 769 auio.uio_resid = uap->len; 770 auio.uio_segflg = UIO_USERSPACE; 771 auio.uio_rw = UIO_READ; 772 auio.uio_td = td; 773 774 error = kern_recvmsg(uap->s, uap->from ? &sa : NULL, &auio, NULL, 775 &uap->flags, &uap->sysmsg_result); 776 777 if (error == 0 && uap->from) { 778 /* note: sa may still be NULL */ 779 if (sa) { 780 fromlen = MIN(fromlen, sa->sa_len); 781 error = copyout(sa, uap->from, fromlen); 782 } else { 783 fromlen = 0; 784 } 785 if (error == 0) { 786 error = copyout(&fromlen, uap->fromlenaddr, 787 sizeof(fromlen)); 788 } 789 } 790 if (sa) 791 FREE(sa, M_SONAME); 792 793 return (error); 794 } 795 796 /* 797 * recvmsg_args(int s, struct msghdr *msg, int flags) 798 */ 799 int 800 recvmsg(struct recvmsg_args *uap) 801 { 802 struct thread *td = curthread; 803 struct msghdr msg; 804 struct uio auio; 805 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 806 struct mbuf *m, *control = NULL; 807 struct sockaddr *sa = NULL; 808 caddr_t ctlbuf; 809 socklen_t *ufromlenp, *ucontrollenp; 810 int error, fromlen, controllen, len, flags, *uflagsp; 811 812 /* 813 * This copyin handles everything except the iovec. 814 */ 815 error = copyin(uap->msg, &msg, sizeof(msg)); 816 if (error) 817 return (error); 818 819 if (msg.msg_name && msg.msg_namelen < 0) 820 return (EINVAL); 821 if (msg.msg_control && msg.msg_controllen < 0) 822 return (EINVAL); 823 824 ufromlenp = (socklen_t *)((caddr_t)uap->msg + offsetof(struct msghdr, 825 msg_namelen)); 826 ucontrollenp = (socklen_t *)((caddr_t)uap->msg + offsetof(struct msghdr, 827 msg_controllen)); 828 uflagsp = (int *)((caddr_t)uap->msg + offsetof(struct msghdr, 829 msg_flags)); 830 831 /* 832 * Populate auio. 833 */ 834 error = iovec_copyin(msg.msg_iov, &iov, aiov, msg.msg_iovlen, 835 &auio.uio_resid); 836 if (error) 837 return (error); 838 auio.uio_iov = iov; 839 auio.uio_iovcnt = msg.msg_iovlen; 840 auio.uio_offset = 0; 841 auio.uio_segflg = UIO_USERSPACE; 842 auio.uio_rw = UIO_READ; 843 auio.uio_td = td; 844 845 flags = msg.msg_flags; 846 847 error = kern_recvmsg(uap->s, msg.msg_name ? &sa : NULL, &auio, 848 msg.msg_control ? &control : NULL, &flags, &uap->sysmsg_result); 849 850 /* 851 * Conditionally copyout the name and populate the namelen field. 852 */ 853 if (error == 0 && msg.msg_name) { 854 fromlen = MIN(msg.msg_namelen, sa->sa_len); 855 error = copyout(sa, msg.msg_name, fromlen); 856 if (error == 0) 857 error = copyout(&fromlen, ufromlenp, 858 sizeof(*ufromlenp)); 859 } 860 861 /* 862 * Copyout msg.msg_control and msg.msg_controllen. 863 */ 864 if (error == 0 && msg.msg_control) { 865 len = msg.msg_controllen; 866 m = control; 867 ctlbuf = (caddr_t)msg.msg_control; 868 869 while(m && len > 0) { 870 unsigned int tocopy; 871 872 if (len >= m->m_len) { 873 tocopy = m->m_len; 874 } else { 875 msg.msg_flags |= MSG_CTRUNC; 876 tocopy = len; 877 } 878 879 error = copyout(mtod(m, caddr_t), ctlbuf, tocopy); 880 if (error) 881 goto cleanup; 882 883 ctlbuf += tocopy; 884 len -= tocopy; 885 m = m->m_next; 886 } 887 controllen = ctlbuf - (caddr_t)msg.msg_control; 888 error = copyout(&controllen, ucontrollenp, 889 sizeof(*ucontrollenp)); 890 } 891 892 if (error == 0) 893 error = copyout(&flags, uflagsp, sizeof(*uflagsp)); 894 895 cleanup: 896 if (sa) 897 FREE(sa, M_SONAME); 898 iovec_free(&iov, aiov); 899 if (control) 900 m_freem(control); 901 return (error); 902 } 903 904 /* 905 * shutdown_args(int s, int how) 906 */ 907 int 908 kern_shutdown(int s, int how) 909 { 910 struct thread *td = curthread; 911 struct proc *p = td->td_proc; 912 struct file *fp; 913 int error; 914 915 KKASSERT(p); 916 error = holdsock(p->p_fd, s, &fp); 917 if (error) 918 return (error); 919 error = soshutdown((struct socket *)fp->f_data, how); 920 fdrop(fp, td); 921 return(error); 922 } 923 924 int 925 shutdown(struct shutdown_args *uap) 926 { 927 int error; 928 929 error = kern_shutdown(uap->s, uap->how); 930 931 return (error); 932 } 933 934 /* 935 * If sopt->sopt_td == NULL, then sopt->sopt_val is treated as an 936 * in kernel pointer instead of a userland pointer. This allows us 937 * to manipulate socket options in the emulation code. 938 */ 939 int 940 kern_setsockopt(int s, struct sockopt *sopt) 941 { 942 struct thread *td = curthread; 943 struct proc *p = td->td_proc; 944 struct file *fp; 945 int error; 946 947 if (sopt->sopt_val == 0 && sopt->sopt_valsize != 0) 948 return (EFAULT); 949 if (sopt->sopt_valsize < 0) 950 return (EINVAL); 951 952 error = holdsock(p->p_fd, s, &fp); 953 if (error) 954 return (error); 955 956 error = sosetopt((struct socket *)fp->f_data, sopt); 957 fdrop(fp, td); 958 return (error); 959 } 960 961 /* 962 * setsockopt_args(int s, int level, int name, caddr_t val, int valsize) 963 */ 964 int 965 setsockopt(struct setsockopt_args *uap) 966 { 967 struct thread *td = curthread; 968 struct sockopt sopt; 969 int error; 970 971 sopt.sopt_dir = SOPT_SET; 972 sopt.sopt_level = uap->level; 973 sopt.sopt_name = uap->name; 974 sopt.sopt_val = uap->val; 975 sopt.sopt_valsize = uap->valsize; 976 sopt.sopt_td = td; 977 978 error = kern_setsockopt(uap->s, &sopt); 979 return(error); 980 } 981 982 /* 983 * If sopt->sopt_td == NULL, then sopt->sopt_val is treated as an 984 * in kernel pointer instead of a userland pointer. This allows us 985 * to manipulate socket options in the emulation code. 986 */ 987 int 988 kern_getsockopt(int s, struct sockopt *sopt) 989 { 990 struct thread *td = curthread; 991 struct proc *p = td->td_proc; 992 struct file *fp; 993 int error; 994 995 if (sopt->sopt_val == 0 && sopt->sopt_valsize != 0) 996 return (EFAULT); 997 if (sopt->sopt_valsize < 0) 998 return (EINVAL); 999 1000 error = holdsock(p->p_fd, s, &fp); 1001 if (error) 1002 return (error); 1003 1004 error = sogetopt((struct socket *)fp->f_data, sopt); 1005 fdrop(fp, td); 1006 return (error); 1007 } 1008 1009 /* 1010 * getsockopt_Args(int s, int level, int name, caddr_t val, int *avalsize) 1011 */ 1012 int 1013 getsockopt(struct getsockopt_args *uap) 1014 { 1015 struct thread *td = curthread; 1016 struct sockopt sopt; 1017 int error, valsize; 1018 1019 if (uap->val) { 1020 error = copyin(uap->avalsize, &valsize, sizeof(valsize)); 1021 if (error) 1022 return (error); 1023 if (valsize < 0) 1024 return (EINVAL); 1025 } else { 1026 valsize = 0; 1027 } 1028 1029 sopt.sopt_dir = SOPT_GET; 1030 sopt.sopt_level = uap->level; 1031 sopt.sopt_name = uap->name; 1032 sopt.sopt_val = uap->val; 1033 sopt.sopt_valsize = valsize; 1034 sopt.sopt_td = td; 1035 1036 error = kern_getsockopt(uap->s, &sopt); 1037 if (error == 0) { 1038 valsize = sopt.sopt_valsize; 1039 error = copyout(&valsize, uap->avalsize, sizeof(valsize)); 1040 } 1041 return (error); 1042 } 1043 1044 /* 1045 * The second argument to kern_getsockname() is a handle to a struct sockaddr. 1046 * This allows kern_getsockname() to return a pointer to an allocated struct 1047 * sockaddr which must be freed later with FREE(). The caller must 1048 * initialize *name to NULL. 1049 */ 1050 int 1051 kern_getsockname(int s, struct sockaddr **name, int *namelen) 1052 { 1053 struct thread *td = curthread; 1054 struct proc *p = td->td_proc; 1055 struct file *fp; 1056 struct socket *so; 1057 struct sockaddr *sa = NULL; 1058 int error; 1059 1060 error = holdsock(p->p_fd, s, &fp); 1061 if (error) 1062 return (error); 1063 if (*namelen < 0) { 1064 fdrop(fp, td); 1065 return (EINVAL); 1066 } 1067 so = (struct socket *)fp->f_data; 1068 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1069 if (error == 0) { 1070 if (sa == 0) { 1071 *namelen = 0; 1072 } else { 1073 *namelen = MIN(*namelen, sa->sa_len); 1074 *name = sa; 1075 } 1076 } 1077 1078 fdrop(fp, td); 1079 return (error); 1080 } 1081 1082 /* 1083 * getsockname_args(int fdes, caddr_t asa, int *alen) 1084 * 1085 * Get socket name. 1086 */ 1087 int 1088 getsockname(struct getsockname_args *uap) 1089 { 1090 struct sockaddr *sa = NULL; 1091 int error, sa_len; 1092 1093 error = copyin(uap->alen, &sa_len, sizeof(sa_len)); 1094 if (error) 1095 return (error); 1096 1097 error = kern_getsockname(uap->fdes, &sa, &sa_len); 1098 1099 if (error == 0) 1100 error = copyout(sa, uap->asa, sa_len); 1101 if (error == 0) 1102 error = copyout(&sa_len, uap->alen, sizeof(*uap->alen)); 1103 if (sa) 1104 FREE(sa, M_SONAME); 1105 return (error); 1106 } 1107 1108 /* 1109 * The second argument to kern_getpeername() is a handle to a struct sockaddr. 1110 * This allows kern_getpeername() to return a pointer to an allocated struct 1111 * sockaddr which must be freed later with FREE(). The caller must 1112 * initialize *name to NULL. 1113 */ 1114 int 1115 kern_getpeername(int s, struct sockaddr **name, int *namelen) 1116 { 1117 struct thread *td = curthread; 1118 struct proc *p = td->td_proc; 1119 struct file *fp; 1120 struct socket *so; 1121 struct sockaddr *sa = NULL; 1122 int error; 1123 1124 error = holdsock(p->p_fd, s, &fp); 1125 if (error) 1126 return (error); 1127 if (*namelen < 0) { 1128 fdrop(fp, td); 1129 return (EINVAL); 1130 } 1131 so = (struct socket *)fp->f_data; 1132 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1133 fdrop(fp, td); 1134 return (ENOTCONN); 1135 } 1136 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1137 if (error == 0) { 1138 if (sa == 0) { 1139 *namelen = 0; 1140 } else { 1141 *namelen = MIN(*namelen, sa->sa_len); 1142 *name = sa; 1143 } 1144 } 1145 1146 fdrop(fp, td); 1147 return (error); 1148 } 1149 1150 /* 1151 * getpeername_args(int fdes, caddr_t asa, int *alen) 1152 * 1153 * Get name of peer for connected socket. 1154 */ 1155 int 1156 getpeername(struct getpeername_args *uap) 1157 { 1158 struct sockaddr *sa = NULL; 1159 int error, sa_len; 1160 1161 error = copyin(uap->alen, &sa_len, sizeof(sa_len)); 1162 if (error) 1163 return (error); 1164 1165 error = kern_getpeername(uap->fdes, &sa, &sa_len); 1166 1167 if (error == 0) 1168 error = copyout(sa, uap->asa, sa_len); 1169 if (error == 0) 1170 error = copyout(&sa_len, uap->alen, sizeof(*uap->alen)); 1171 if (sa) 1172 FREE(sa, M_SONAME); 1173 return (error); 1174 } 1175 1176 int 1177 getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len) 1178 { 1179 struct sockaddr *sa; 1180 int error; 1181 1182 *namp = NULL; 1183 if (len > SOCK_MAXADDRLEN) 1184 return ENAMETOOLONG; 1185 if (len < offsetof(struct sockaddr, sa_data[0])) 1186 return EDOM; 1187 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1188 error = copyin(uaddr, sa, len); 1189 if (error) { 1190 FREE(sa, M_SONAME); 1191 } else { 1192 #if BYTE_ORDER != BIG_ENDIAN 1193 /* 1194 * The bind(), connect(), and sendto() syscalls were not 1195 * versioned for COMPAT_43. Thus, this check must stay. 1196 */ 1197 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1198 sa->sa_family = sa->sa_len; 1199 #endif 1200 sa->sa_len = len; 1201 *namp = sa; 1202 } 1203 return error; 1204 } 1205 1206 /* 1207 * holdsock() - load the struct file pointer associated 1208 * with a socket into *fpp. If an error occurs, non-zero 1209 * will be returned and *fpp will be set to NULL. 1210 */ 1211 int 1212 holdsock(fdp, fdes, fpp) 1213 struct filedesc *fdp; 1214 int fdes; 1215 struct file **fpp; 1216 { 1217 struct file *fp = NULL; 1218 int error = 0; 1219 1220 if ((unsigned)fdes >= fdp->fd_nfiles || 1221 (fp = fdp->fd_ofiles[fdes]) == NULL) { 1222 error = EBADF; 1223 } else if (fp->f_type != DTYPE_SOCKET) { 1224 error = ENOTSOCK; 1225 fp = NULL; 1226 } else { 1227 fhold(fp); 1228 } 1229 *fpp = fp; 1230 return(error); 1231 } 1232 1233 /* 1234 * sendfile(2). 1235 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1236 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1237 * 1238 * Send a file specified by 'fd' and starting at 'offset' to a socket 1239 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1240 * nbytes == 0. Optionally add a header and/or trailer to the socket 1241 * output. If specified, write the total number of bytes sent into *sbytes. 1242 * 1243 * In FreeBSD kern/uipc_syscalls.c,v 1.103, a bug was fixed that caused 1244 * the headers to count against the remaining bytes to be sent from 1245 * the file descriptor. We may wish to implement a compatibility syscall 1246 * in the future. 1247 */ 1248 int 1249 sendfile(struct sendfile_args *uap) 1250 { 1251 struct thread *td = curthread; 1252 struct proc *p = td->td_proc; 1253 struct file *fp; 1254 struct filedesc *fdp; 1255 struct vnode *vp = NULL; 1256 struct sf_hdtr hdtr; 1257 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 1258 struct uio auio; 1259 off_t hdtr_size = 0, sbytes; 1260 int error, res; 1261 1262 KKASSERT(p); 1263 fdp = p->p_fd; 1264 1265 /* 1266 * Do argument checking. Must be a regular file in, stream 1267 * type and connected socket out, positive offset. 1268 */ 1269 fp = holdfp(fdp, uap->fd, FREAD); 1270 if (fp == NULL) { 1271 return (EBADF); 1272 } 1273 if (fp->f_type != DTYPE_VNODE) { 1274 fdrop(fp, td); 1275 return (EINVAL); 1276 } 1277 vp = (struct vnode *)fp->f_data; 1278 vref(vp); 1279 fdrop(fp, td); 1280 1281 /* 1282 * If specified, get the pointer to the sf_hdtr struct for 1283 * any headers/trailers. 1284 */ 1285 if (uap->hdtr) { 1286 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1287 if (error) 1288 goto done; 1289 /* 1290 * Send any headers. 1291 */ 1292 if (hdtr.headers) { 1293 error = iovec_copyin(hdtr.headers, &iov, aiov, 1294 hdtr.hdr_cnt, &auio.uio_resid); 1295 if (error) 1296 goto done; 1297 auio.uio_iov = iov; 1298 auio.uio_iovcnt = hdtr.hdr_cnt; 1299 auio.uio_offset = 0; 1300 auio.uio_segflg = UIO_USERSPACE; 1301 auio.uio_rw = UIO_WRITE; 1302 auio.uio_td = td; 1303 1304 error = kern_sendmsg(uap->s, NULL, &auio, NULL, 0, 1305 &res); 1306 1307 iovec_free(&iov, aiov); 1308 if (error) 1309 goto done; 1310 hdtr_size += res; 1311 } 1312 } 1313 1314 error = kern_sendfile(vp, uap->s, uap->offset, uap->nbytes, 1315 &sbytes, uap->flags); 1316 if (error) 1317 goto done; 1318 1319 /* 1320 * Send trailers. Wimp out and use writev(2). 1321 */ 1322 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1323 error = iovec_copyin(hdtr.trailers, &iov, aiov, 1324 hdtr.trl_cnt, &auio.uio_resid); 1325 if (error) 1326 goto done; 1327 auio.uio_iov = iov; 1328 auio.uio_iovcnt = hdtr.trl_cnt; 1329 auio.uio_offset = 0; 1330 auio.uio_segflg = UIO_USERSPACE; 1331 auio.uio_rw = UIO_WRITE; 1332 auio.uio_td = td; 1333 1334 error = kern_sendmsg(uap->s, NULL, &auio, NULL, 0, &res); 1335 1336 iovec_free(&iov, aiov); 1337 if (error) 1338 goto done; 1339 hdtr_size += res; 1340 } 1341 1342 done: 1343 if (uap->sbytes != NULL) { 1344 sbytes += hdtr_size; 1345 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1346 } 1347 if (vp) 1348 vrele(vp); 1349 return (error); 1350 } 1351 1352 int 1353 kern_sendfile(struct vnode *vp, int s, off_t offset, size_t nbytes, 1354 off_t *sbytes, int flags) 1355 { 1356 struct thread *td = curthread; 1357 struct proc *p = td->td_proc; 1358 struct vm_object *obj; 1359 struct socket *so; 1360 struct file *fp; 1361 struct mbuf *m; 1362 struct sf_buf *sf; 1363 struct vm_page *pg; 1364 off_t off, xfsize; 1365 int error = 0; 1366 1367 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1368 error = EINVAL; 1369 goto done; 1370 } 1371 error = holdsock(p->p_fd, s, &fp); 1372 if (error) 1373 goto done; 1374 so = (struct socket *)fp->f_data; 1375 if (so->so_type != SOCK_STREAM) { 1376 error = EINVAL; 1377 goto done; 1378 } 1379 if ((so->so_state & SS_ISCONNECTED) == 0) { 1380 error = ENOTCONN; 1381 goto done; 1382 } 1383 if (offset < 0) { 1384 error = EINVAL; 1385 goto done; 1386 } 1387 1388 *sbytes = 0; 1389 /* 1390 * Protect against multiple writers to the socket. 1391 */ 1392 (void) sblock(&so->so_snd, M_WAITOK); 1393 1394 /* 1395 * Loop through the pages in the file, starting with the requested 1396 * offset. Get a file page (do I/O if necessary), map the file page 1397 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1398 * it on the socket. 1399 */ 1400 for (off = offset; ; off += xfsize, *sbytes += xfsize) { 1401 vm_pindex_t pindex; 1402 vm_offset_t pgoff; 1403 1404 pindex = OFF_TO_IDX(off); 1405 retry_lookup: 1406 /* 1407 * Calculate the amount to transfer. Not to exceed a page, 1408 * the EOF, or the passed in nbytes. 1409 */ 1410 xfsize = obj->un_pager.vnp.vnp_size - off; 1411 if (xfsize > PAGE_SIZE) 1412 xfsize = PAGE_SIZE; 1413 pgoff = (vm_offset_t)(off & PAGE_MASK); 1414 if (PAGE_SIZE - pgoff < xfsize) 1415 xfsize = PAGE_SIZE - pgoff; 1416 if (nbytes && xfsize > (nbytes - *sbytes)) 1417 xfsize = nbytes - *sbytes; 1418 if (xfsize <= 0) 1419 break; 1420 /* 1421 * Optimize the non-blocking case by looking at the socket space 1422 * before going to the extra work of constituting the sf_buf. 1423 */ 1424 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1425 if (so->so_state & SS_CANTSENDMORE) 1426 error = EPIPE; 1427 else 1428 error = EAGAIN; 1429 sbunlock(&so->so_snd); 1430 goto done; 1431 } 1432 /* 1433 * Attempt to look up the page. 1434 * 1435 * Allocate if not found 1436 * 1437 * Wait and loop if busy. 1438 */ 1439 pg = vm_page_lookup(obj, pindex); 1440 1441 if (pg == NULL) { 1442 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL); 1443 if (pg == NULL) { 1444 VM_WAIT; 1445 goto retry_lookup; 1446 } 1447 vm_page_wakeup(pg); 1448 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) { 1449 goto retry_lookup; 1450 } 1451 1452 /* 1453 * Wire the page so it does not get ripped out from under 1454 * us. 1455 */ 1456 1457 vm_page_wire(pg); 1458 1459 /* 1460 * If page is not valid for what we need, initiate I/O 1461 */ 1462 1463 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1464 struct uio auio; 1465 struct iovec aiov; 1466 int bsize; 1467 1468 /* 1469 * Ensure that our page is still around when the I/O 1470 * completes. 1471 */ 1472 vm_page_io_start(pg); 1473 1474 /* 1475 * Get the page from backing store. 1476 */ 1477 bsize = vp->v_mount->mnt_stat.f_iosize; 1478 auio.uio_iov = &aiov; 1479 auio.uio_iovcnt = 1; 1480 aiov.iov_base = 0; 1481 aiov.iov_len = MAXBSIZE; 1482 auio.uio_resid = MAXBSIZE; 1483 auio.uio_offset = trunc_page(off); 1484 auio.uio_segflg = UIO_NOCOPY; 1485 auio.uio_rw = UIO_READ; 1486 auio.uio_td = td; 1487 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1488 error = VOP_READ(vp, &auio, 1489 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1490 p->p_ucred); 1491 VOP_UNLOCK(vp, 0, td); 1492 vm_page_flag_clear(pg, PG_ZERO); 1493 vm_page_io_finish(pg); 1494 if (error) { 1495 vm_page_unwire(pg, 0); 1496 /* 1497 * See if anyone else might know about this page. 1498 * If not and it is not valid, then free it. 1499 */ 1500 if (pg->wire_count == 0 && pg->valid == 0 && 1501 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1502 pg->hold_count == 0) { 1503 vm_page_busy(pg); 1504 vm_page_free(pg); 1505 } 1506 sbunlock(&so->so_snd); 1507 goto done; 1508 } 1509 } 1510 1511 1512 /* 1513 * Get a sendfile buf. We usually wait as long as necessary, 1514 * but this wait can be interrupted. 1515 */ 1516 if ((sf = sf_buf_alloc(pg)) == NULL) { 1517 s = splvm(); 1518 vm_page_unwire(pg, 0); 1519 if (pg->wire_count == 0 && pg->object == NULL) 1520 vm_page_free(pg); 1521 splx(s); 1522 sbunlock(&so->so_snd); 1523 error = EINTR; 1524 goto done; 1525 } 1526 1527 /* 1528 * Get an mbuf header and set it up as having external storage. 1529 */ 1530 MGETHDR(m, M_WAIT, MT_DATA); 1531 if (m == NULL) { 1532 error = ENOBUFS; 1533 sf_buf_free((void *)sf->kva, PAGE_SIZE); 1534 sbunlock(&so->so_snd); 1535 goto done; 1536 } 1537 m->m_ext.ext_free = sf_buf_free; 1538 m->m_ext.ext_ref = sf_buf_ref; 1539 m->m_ext.ext_buf = (void *)sf->kva; 1540 m->m_ext.ext_size = PAGE_SIZE; 1541 m->m_data = (char *) sf->kva + pgoff; 1542 m->m_flags |= M_EXT; 1543 m->m_pkthdr.len = m->m_len = xfsize; 1544 /* 1545 * Add the buffer to the socket buffer chain. 1546 */ 1547 s = splnet(); 1548 retry_space: 1549 /* 1550 * Make sure that the socket is still able to take more data. 1551 * CANTSENDMORE being true usually means that the connection 1552 * was closed. so_error is true when an error was sensed after 1553 * a previous send. 1554 * The state is checked after the page mapping and buffer 1555 * allocation above since those operations may block and make 1556 * any socket checks stale. From this point forward, nothing 1557 * blocks before the pru_send (or more accurately, any blocking 1558 * results in a loop back to here to re-check). 1559 */ 1560 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1561 if (so->so_state & SS_CANTSENDMORE) { 1562 error = EPIPE; 1563 } else { 1564 error = so->so_error; 1565 so->so_error = 0; 1566 } 1567 m_freem(m); 1568 sbunlock(&so->so_snd); 1569 splx(s); 1570 goto done; 1571 } 1572 /* 1573 * Wait for socket space to become available. We do this just 1574 * after checking the connection state above in order to avoid 1575 * a race condition with sbwait(). 1576 */ 1577 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1578 if (so->so_state & SS_NBIO) { 1579 m_freem(m); 1580 sbunlock(&so->so_snd); 1581 splx(s); 1582 error = EAGAIN; 1583 goto done; 1584 } 1585 error = sbwait(&so->so_snd); 1586 /* 1587 * An error from sbwait usually indicates that we've 1588 * been interrupted by a signal. If we've sent anything 1589 * then return bytes sent, otherwise return the error. 1590 */ 1591 if (error) { 1592 m_freem(m); 1593 sbunlock(&so->so_snd); 1594 splx(s); 1595 goto done; 1596 } 1597 goto retry_space; 1598 } 1599 error = 1600 (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 1601 splx(s); 1602 if (error) { 1603 sbunlock(&so->so_snd); 1604 goto done; 1605 } 1606 } 1607 sbunlock(&so->so_snd); 1608 1609 done: 1610 if (fp) 1611 fdrop(fp, td); 1612 return (error); 1613 } 1614