1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.65.2.17 2003/04/04 17:11:16 tegge Exp $ 38 * $DragonFly: src/sys/kern/uipc_syscalls.c,v 1.26 2004/03/06 03:26:57 hsu Exp $ 39 */ 40 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/sysproto.h> 47 #include <sys/malloc.h> 48 #include <sys/filedesc.h> 49 #include <sys/event.h> 50 #include <sys/proc.h> 51 #include <sys/fcntl.h> 52 #include <sys/file.h> 53 #include <sys/filio.h> 54 #include <sys/kern_syscall.h> 55 #include <sys/mbuf.h> 56 #include <sys/protosw.h> 57 #include <sys/sfbuf.h> 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/socketops.h> 61 #include <sys/uio.h> 62 #include <sys/vnode.h> 63 #include <sys/lock.h> 64 #include <sys/mount.h> 65 #ifdef KTRACE 66 #include <sys/ktrace.h> 67 #endif 68 #include <vm/vm.h> 69 #include <vm/vm_object.h> 70 #include <vm/vm_page.h> 71 #include <vm/vm_pageout.h> 72 #include <vm/vm_kern.h> 73 #include <vm/vm_extern.h> 74 #include <sys/file2.h> 75 76 /* 77 * System call interface to the socket abstraction. 78 */ 79 80 extern struct fileops socketops; 81 82 /* 83 * socket_args(int domain, int type, int protocol) 84 */ 85 int 86 kern_socket(int domain, int type, int protocol, int *res) 87 { 88 struct thread *td = curthread; 89 struct proc *p = td->td_proc; 90 struct filedesc *fdp; 91 struct socket *so; 92 struct file *fp; 93 int fd, error; 94 95 KKASSERT(p); 96 fdp = p->p_fd; 97 98 error = falloc(p, &fp, &fd); 99 if (error) 100 return (error); 101 fhold(fp); 102 error = socreate(domain, &so, type, protocol, td); 103 if (error) { 104 if (fdp->fd_ofiles[fd] == fp) { 105 fdp->fd_ofiles[fd] = NULL; 106 fdrop(fp, td); 107 } 108 } else { 109 fp->f_data = (caddr_t)so; 110 fp->f_flag = FREAD|FWRITE; 111 fp->f_ops = &socketops; 112 fp->f_type = DTYPE_SOCKET; 113 *res = fd; 114 } 115 fdrop(fp, td); 116 return (error); 117 } 118 119 int 120 socket(struct socket_args *uap) 121 { 122 int error; 123 124 error = kern_socket(uap->domain, uap->type, uap->protocol, 125 &uap->sysmsg_result); 126 127 return (error); 128 } 129 130 int 131 kern_bind(int s, struct sockaddr *sa) 132 { 133 struct thread *td = curthread; 134 struct proc *p = td->td_proc; 135 struct file *fp; 136 int error; 137 138 KKASSERT(p); 139 error = holdsock(p->p_fd, s, &fp); 140 if (error) 141 return (error); 142 error = sobind((struct socket *)fp->f_data, sa, td); 143 fdrop(fp, td); 144 return (error); 145 } 146 147 /* 148 * bind_args(int s, caddr_t name, int namelen) 149 */ 150 int 151 bind(struct bind_args *uap) 152 { 153 struct sockaddr *sa; 154 int error; 155 156 error = getsockaddr(&sa, uap->name, uap->namelen); 157 if (error) 158 return (error); 159 error = kern_bind(uap->s, sa); 160 FREE(sa, M_SONAME); 161 162 return (error); 163 } 164 165 int 166 kern_listen(int s, int backlog) 167 { 168 struct thread *td = curthread; 169 struct proc *p = td->td_proc; 170 struct file *fp; 171 int error; 172 173 KKASSERT(p); 174 error = holdsock(p->p_fd, s, &fp); 175 if (error) 176 return (error); 177 error = solisten((struct socket *)fp->f_data, backlog, td); 178 fdrop(fp, td); 179 return(error); 180 } 181 182 /* 183 * listen_args(int s, int backlog) 184 */ 185 int 186 listen(struct listen_args *uap) 187 { 188 int error; 189 190 error = kern_listen(uap->s, uap->backlog); 191 return (error); 192 } 193 194 /* 195 * The second argument to kern_accept() is a handle to a struct sockaddr. 196 * This allows kern_accept() to return a pointer to an allocated struct 197 * sockaddr which must be freed later with FREE(). The caller must 198 * initialize *name to NULL. 199 */ 200 int 201 kern_accept(int s, struct sockaddr **name, int *namelen, int *res) 202 { 203 struct thread *td = curthread; 204 struct proc *p = td->td_proc; 205 struct filedesc *fdp = p->p_fd; 206 struct file *lfp = NULL; 207 struct file *nfp = NULL; 208 struct sockaddr *sa; 209 int error, s1; 210 struct socket *head, *so; 211 int fd; 212 u_int fflag; /* type must match fp->f_flag */ 213 int tmp; 214 215 if (name && namelen && *namelen < 0) 216 return (EINVAL); 217 218 error = holdsock(fdp, s, &lfp); 219 if (error) 220 return (error); 221 s1 = splnet(); 222 head = (struct socket *)lfp->f_data; 223 if ((head->so_options & SO_ACCEPTCONN) == 0) { 224 splx(s1); 225 error = EINVAL; 226 goto done; 227 } 228 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 229 if (head->so_state & SS_CANTRCVMORE) { 230 head->so_error = ECONNABORTED; 231 break; 232 } 233 if ((head->so_state & SS_NBIO) != 0) { 234 head->so_error = EWOULDBLOCK; 235 break; 236 } 237 error = tsleep((caddr_t)&head->so_timeo, PCATCH, "accept", 0); 238 if (error) { 239 splx(s1); 240 goto done; 241 } 242 } 243 if (head->so_error) { 244 error = head->so_error; 245 head->so_error = 0; 246 splx(s1); 247 goto done; 248 } 249 250 /* 251 * At this point we know that there is at least one connection 252 * ready to be accepted. Remove it from the queue prior to 253 * allocating the file descriptor for it since falloc() may 254 * block allowing another process to accept the connection 255 * instead. 256 */ 257 so = TAILQ_FIRST(&head->so_comp); 258 TAILQ_REMOVE(&head->so_comp, so, so_list); 259 head->so_qlen--; 260 261 fflag = lfp->f_flag; 262 error = falloc(p, &nfp, &fd); 263 if (error) { 264 /* 265 * Probably ran out of file descriptors. Put the 266 * unaccepted connection back onto the queue and 267 * do another wakeup so some other process might 268 * have a chance at it. 269 */ 270 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 271 head->so_qlen++; 272 wakeup_one(&head->so_timeo); 273 splx(s1); 274 goto done; 275 } 276 fhold(nfp); 277 *res = fd; 278 279 /* connection has been removed from the listen queue */ 280 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 281 282 so->so_state &= ~SS_COMP; 283 so->so_head = NULL; 284 if (head->so_sigio != NULL) 285 fsetown(fgetown(head->so_sigio), &so->so_sigio); 286 287 nfp->f_data = (caddr_t)so; 288 nfp->f_flag = fflag; 289 nfp->f_ops = &socketops; 290 nfp->f_type = DTYPE_SOCKET; 291 /* Sync socket nonblocking/async state with file flags */ 292 tmp = fflag & FNONBLOCK; 293 (void) fo_ioctl(nfp, FIONBIO, (caddr_t)&tmp, td); 294 tmp = fflag & FASYNC; 295 (void) fo_ioctl(nfp, FIOASYNC, (caddr_t)&tmp, td); 296 297 sa = NULL; 298 error = soaccept(so, &sa); 299 300 /* 301 * Set the returned name and namelen as applicable. Set the returned 302 * namelen to 0 for older code which might ignore the return value 303 * from accept. 304 */ 305 if (error == 0) { 306 if (sa && name && namelen) { 307 if (*namelen > sa->sa_len) 308 *namelen = sa->sa_len; 309 *name = sa; 310 } else { 311 if (sa) 312 FREE(sa, M_SONAME); 313 } 314 } 315 316 /* 317 * close the new descriptor, assuming someone hasn't ripped it 318 * out from under us. Note that *res is normally ignored if an 319 * error is returned but a syscall message will still have access 320 * to the result code. 321 */ 322 if (error) { 323 *res = -1; 324 if (fdp->fd_ofiles[fd] == nfp) { 325 fdp->fd_ofiles[fd] = NULL; 326 fdrop(nfp, td); 327 } 328 } 329 splx(s1); 330 331 /* 332 * Release explicitly held references before returning. 333 */ 334 done: 335 if (nfp != NULL) 336 fdrop(nfp, td); 337 fdrop(lfp, td); 338 return (error); 339 } 340 341 /* 342 * accept_args(int s, caddr_t name, int *anamelen) 343 */ 344 int 345 accept(struct accept_args *uap) 346 { 347 struct sockaddr *sa = NULL; 348 int sa_len; 349 int error; 350 351 if (uap->name) { 352 error = copyin(uap->anamelen, &sa_len, sizeof(sa_len)); 353 if (error) 354 return (error); 355 356 error = kern_accept(uap->s, &sa, &sa_len, &uap->sysmsg_result); 357 358 if (error == 0) 359 error = copyout(sa, uap->name, sa_len); 360 if (error == 0) { 361 error = copyout(&sa_len, uap->anamelen, 362 sizeof(*uap->anamelen)); 363 } 364 if (sa) 365 FREE(sa, M_SONAME); 366 } else { 367 error = kern_accept(uap->s, NULL, 0, &uap->sysmsg_result); 368 } 369 return (error); 370 } 371 372 int 373 kern_connect(int s, struct sockaddr *sa) 374 { 375 struct thread *td = curthread; 376 struct proc *p = td->td_proc; 377 struct file *fp; 378 struct socket *so; 379 int error; 380 381 error = holdsock(p->p_fd, s, &fp); 382 if (error) 383 return (error); 384 so = (struct socket *)fp->f_data; 385 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 386 error = EALREADY; 387 goto done; 388 } 389 error = soconnect(so, sa, td); 390 if (error) 391 goto bad; 392 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 393 error = EINPROGRESS; 394 goto done; 395 } 396 s = splnet(); 397 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 398 error = tsleep((caddr_t)&so->so_timeo, PCATCH, "connec", 0); 399 if (error) 400 break; 401 } 402 if (error == 0) { 403 error = so->so_error; 404 so->so_error = 0; 405 } 406 splx(s); 407 bad: 408 so->so_state &= ~SS_ISCONNECTING; 409 if (error == ERESTART) 410 error = EINTR; 411 done: 412 fdrop(fp, td); 413 return (error); 414 } 415 416 /* 417 * connect_args(int s, caddr_t name, int namelen) 418 */ 419 int 420 connect(struct connect_args *uap) 421 { 422 struct sockaddr *sa; 423 int error; 424 425 error = getsockaddr(&sa, uap->name, uap->namelen); 426 if (error) 427 return (error); 428 error = kern_connect(uap->s, sa); 429 FREE(sa, M_SONAME); 430 431 return (error); 432 } 433 434 int 435 kern_socketpair(int domain, int type, int protocol, int *sv) 436 { 437 struct thread *td = curthread; 438 struct proc *p = td->td_proc; 439 struct filedesc *fdp; 440 struct file *fp1, *fp2; 441 struct socket *so1, *so2; 442 int fd, error; 443 444 KKASSERT(p); 445 fdp = p->p_fd; 446 error = socreate(domain, &so1, type, protocol, td); 447 if (error) 448 return (error); 449 error = socreate(domain, &so2, type, protocol, td); 450 if (error) 451 goto free1; 452 error = falloc(p, &fp1, &fd); 453 if (error) 454 goto free2; 455 fhold(fp1); 456 sv[0] = fd; 457 fp1->f_data = (caddr_t)so1; 458 error = falloc(p, &fp2, &fd); 459 if (error) 460 goto free3; 461 fhold(fp2); 462 fp2->f_data = (caddr_t)so2; 463 sv[1] = fd; 464 error = soconnect2(so1, so2); 465 if (error) 466 goto free4; 467 if (type == SOCK_DGRAM) { 468 /* 469 * Datagram socket connection is asymmetric. 470 */ 471 error = soconnect2(so2, so1); 472 if (error) 473 goto free4; 474 } 475 fp1->f_flag = fp2->f_flag = FREAD|FWRITE; 476 fp1->f_ops = fp2->f_ops = &socketops; 477 fp1->f_type = fp2->f_type = DTYPE_SOCKET; 478 fdrop(fp1, td); 479 fdrop(fp2, td); 480 return (error); 481 free4: 482 if (fdp->fd_ofiles[sv[1]] == fp2) { 483 fdp->fd_ofiles[sv[1]] = NULL; 484 fdrop(fp2, td); 485 } 486 fdrop(fp2, td); 487 free3: 488 if (fdp->fd_ofiles[sv[0]] == fp1) { 489 fdp->fd_ofiles[sv[0]] = NULL; 490 fdrop(fp1, td); 491 } 492 fdrop(fp1, td); 493 free2: 494 (void)soclose(so2); 495 free1: 496 (void)soclose(so1); 497 return (error); 498 } 499 500 /* 501 * socketpair(int domain, int type, int protocol, int *rsv) 502 */ 503 int 504 socketpair(struct socketpair_args *uap) 505 { 506 int error, sockv[2]; 507 508 error = kern_socketpair(uap->domain, uap->type, uap->protocol, sockv); 509 510 if (error == 0) 511 error = copyout(sockv, uap->rsv, sizeof(sockv)); 512 return (error); 513 } 514 515 int 516 kern_sendmsg(int s, struct sockaddr *sa, struct uio *auio, 517 struct mbuf *control, int flags, int *res) 518 { 519 struct thread *td = curthread; 520 struct proc *p = td->td_proc; 521 struct file *fp; 522 int len, error; 523 struct socket *so; 524 #ifdef KTRACE 525 struct iovec *ktriov = NULL; 526 struct uio ktruio; 527 #endif 528 529 error = holdsock(p->p_fd, s, &fp); 530 if (error) 531 return (error); 532 if (auio->uio_resid < 0) { 533 error = EINVAL; 534 goto done; 535 } 536 #ifdef KTRACE 537 if (KTRPOINT(td, KTR_GENIO)) { 538 int iovlen = auio->uio_iovcnt * sizeof (struct iovec); 539 540 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 541 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen); 542 ktruio = *auio; 543 } 544 #endif 545 len = auio->uio_resid; 546 so = (struct socket *)fp->f_data; 547 error = so_pru_sosend(so, sa, auio, NULL, control, flags, td); 548 if (error) { 549 if (auio->uio_resid != len && (error == ERESTART || 550 error == EINTR || error == EWOULDBLOCK)) 551 error = 0; 552 if (error == EPIPE) 553 psignal(p, SIGPIPE); 554 } 555 #ifdef KTRACE 556 if (ktriov != NULL) { 557 if (error == 0) { 558 ktruio.uio_iov = ktriov; 559 ktruio.uio_resid = len - auio->uio_resid; 560 ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error); 561 } 562 FREE(ktriov, M_TEMP); 563 } 564 #endif 565 if (error == 0) 566 *res = len - auio->uio_resid; 567 done: 568 fdrop(fp, td); 569 return (error); 570 } 571 572 /* 573 * sendto_args(int s, caddr_t buf, size_t len, int flags, caddr_t to, int tolen) 574 */ 575 int 576 sendto(struct sendto_args *uap) 577 { 578 struct thread *td = curthread; 579 struct uio auio; 580 struct iovec aiov; 581 struct sockaddr *sa = NULL; 582 int error; 583 584 if (uap->to) { 585 error = getsockaddr(&sa, uap->to, uap->tolen); 586 if (error) 587 return (error); 588 } 589 aiov.iov_base = uap->buf; 590 aiov.iov_len = uap->len; 591 auio.uio_iov = &aiov; 592 auio.uio_iovcnt = 1; 593 auio.uio_offset = 0; 594 auio.uio_resid = uap->len; 595 auio.uio_segflg = UIO_USERSPACE; 596 auio.uio_rw = UIO_WRITE; 597 auio.uio_td = td; 598 599 error = kern_sendmsg(uap->s, sa, &auio, NULL, uap->flags, 600 &uap->sysmsg_result); 601 602 if (sa) 603 FREE(sa, M_SONAME); 604 return (error); 605 } 606 607 /* 608 * sendmsg_args(int s, caddr_t msg, int flags) 609 */ 610 int 611 sendmsg(struct sendmsg_args *uap) 612 { 613 struct thread *td = curthread; 614 struct msghdr msg; 615 struct uio auio; 616 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 617 struct sockaddr *sa = NULL; 618 struct mbuf *control = NULL; 619 int error; 620 621 error = copyin(uap->msg, (caddr_t)&msg, sizeof(msg)); 622 if (error) 623 return (error); 624 625 /* 626 * Conditionally copyin msg.msg_name. 627 */ 628 if (msg.msg_name) { 629 error = getsockaddr(&sa, msg.msg_name, msg.msg_namelen); 630 if (error) 631 return (error); 632 } 633 634 /* 635 * Populate auio. 636 */ 637 error = iovec_copyin(msg.msg_iov, &iov, aiov, msg.msg_iovlen, 638 &auio.uio_resid); 639 if (error) 640 goto cleanup; 641 auio.uio_iov = iov; 642 auio.uio_iovcnt = msg.msg_iovlen; 643 auio.uio_offset = 0; 644 auio.uio_segflg = UIO_USERSPACE; 645 auio.uio_rw = UIO_WRITE; 646 auio.uio_td = td; 647 648 /* 649 * Conditionally copyin msg.msg_control. 650 */ 651 if (msg.msg_control) { 652 if (msg.msg_controllen < sizeof(struct cmsghdr) || 653 msg.msg_controllen > MLEN) { 654 error = EINVAL; 655 goto cleanup; 656 } 657 control = m_get(M_WAIT, MT_CONTROL); 658 if (control == NULL) { 659 error = ENOBUFS; 660 goto cleanup; 661 } 662 control->m_len = msg.msg_controllen; 663 error = copyin(msg.msg_control, mtod(control, caddr_t), 664 msg.msg_controllen); 665 if (error) { 666 m_free(control); 667 goto cleanup; 668 } 669 } 670 671 error = kern_sendmsg(uap->s, sa, &auio, control, uap->flags, 672 &uap->sysmsg_result); 673 674 cleanup: 675 if (sa) 676 FREE(sa, M_SONAME); 677 iovec_free(&iov, aiov); 678 return (error); 679 } 680 681 /* 682 * kern_recvmsg() takes a handle to sa and control. If the handle is non- 683 * null, it returns a dynamically allocated struct sockaddr and an mbuf. 684 * Don't forget to FREE() and m_free() these if they are returned. 685 */ 686 int 687 kern_recvmsg(int s, struct sockaddr **sa, struct uio *auio, 688 struct mbuf **control, int *flags, int *res) 689 { 690 struct thread *td = curthread; 691 struct proc *p = td->td_proc; 692 struct file *fp; 693 int len, error; 694 struct socket *so; 695 #ifdef KTRACE 696 struct iovec *ktriov = NULL; 697 struct uio ktruio; 698 #endif 699 700 error = holdsock(p->p_fd, s, &fp); 701 if (error) 702 return (error); 703 if (auio->uio_resid < 0) { 704 error = EINVAL; 705 goto done; 706 } 707 #ifdef KTRACE 708 if (KTRPOINT(td, KTR_GENIO)) { 709 int iovlen = auio->uio_iovcnt * sizeof (struct iovec); 710 711 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 712 bcopy(auio->uio_iov, ktriov, iovlen); 713 ktruio = *auio; 714 } 715 #endif 716 len = auio->uio_resid; 717 so = (struct socket *)fp->f_data; 718 error = so_pru_soreceive(so, sa, auio, NULL, control, flags); 719 if (error) { 720 if (auio->uio_resid != len && (error == ERESTART || 721 error == EINTR || error == EWOULDBLOCK)) 722 error = 0; 723 } 724 #ifdef KTRACE 725 if (ktriov != NULL) { 726 if (error == 0) { 727 ktruio.uio_iov = ktriov; 728 ktruio.uio_resid = len - auio->uio_resid; 729 ktrgenio(p->p_tracep, s, UIO_READ, &ktruio, error); 730 } 731 FREE(ktriov, M_TEMP); 732 } 733 #endif 734 if (error == 0) 735 *res = len - auio->uio_resid; 736 done: 737 fdrop(fp, td); 738 return (error); 739 } 740 741 /* 742 * recvfrom_args(int s, caddr_t buf, size_t len, int flags, 743 * caddr_t from, int *fromlenaddr) 744 */ 745 int 746 recvfrom(struct recvfrom_args *uap) 747 { 748 struct thread *td = curthread; 749 struct uio auio; 750 struct iovec aiov; 751 struct sockaddr *sa = NULL; 752 int error, fromlen; 753 754 if (uap->from && uap->fromlenaddr) { 755 error = copyin(uap->fromlenaddr, &fromlen, sizeof(fromlen)); 756 if (error) 757 return (error); 758 if (fromlen < 0) 759 return (EINVAL); 760 } else { 761 fromlen = 0; 762 } 763 aiov.iov_base = uap->buf; 764 aiov.iov_len = uap->len; 765 auio.uio_iov = &aiov; 766 auio.uio_iovcnt = 1; 767 auio.uio_offset = 0; 768 auio.uio_resid = uap->len; 769 auio.uio_segflg = UIO_USERSPACE; 770 auio.uio_rw = UIO_READ; 771 auio.uio_td = td; 772 773 error = kern_recvmsg(uap->s, uap->from ? &sa : NULL, &auio, NULL, 774 &uap->flags, &uap->sysmsg_result); 775 776 if (error == 0 && uap->from) { 777 /* note: sa may still be NULL */ 778 if (sa) { 779 fromlen = MIN(fromlen, sa->sa_len); 780 error = copyout(sa, uap->from, fromlen); 781 } else { 782 fromlen = 0; 783 } 784 if (error == 0) { 785 error = copyout(&fromlen, uap->fromlenaddr, 786 sizeof(fromlen)); 787 } 788 } 789 if (sa) 790 FREE(sa, M_SONAME); 791 792 return (error); 793 } 794 795 /* 796 * recvmsg_args(int s, struct msghdr *msg, int flags) 797 */ 798 int 799 recvmsg(struct recvmsg_args *uap) 800 { 801 struct thread *td = curthread; 802 struct msghdr msg; 803 struct uio auio; 804 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 805 struct mbuf *m, *control = NULL; 806 struct sockaddr *sa = NULL; 807 caddr_t ctlbuf; 808 socklen_t *ufromlenp, *ucontrollenp; 809 int error, fromlen, controllen, len, flags, *uflagsp; 810 811 /* 812 * This copyin handles everything except the iovec. 813 */ 814 error = copyin(uap->msg, &msg, sizeof(msg)); 815 if (error) 816 return (error); 817 818 if (msg.msg_name && msg.msg_namelen < 0) 819 return (EINVAL); 820 if (msg.msg_control && msg.msg_controllen < 0) 821 return (EINVAL); 822 823 ufromlenp = (socklen_t *)((caddr_t)uap->msg + offsetof(struct msghdr, 824 msg_namelen)); 825 ucontrollenp = (socklen_t *)((caddr_t)uap->msg + offsetof(struct msghdr, 826 msg_controllen)); 827 uflagsp = (int *)((caddr_t)uap->msg + offsetof(struct msghdr, 828 msg_flags)); 829 830 /* 831 * Populate auio. 832 */ 833 error = iovec_copyin(msg.msg_iov, &iov, aiov, msg.msg_iovlen, 834 &auio.uio_resid); 835 if (error) 836 return (error); 837 auio.uio_iov = iov; 838 auio.uio_iovcnt = msg.msg_iovlen; 839 auio.uio_offset = 0; 840 auio.uio_segflg = UIO_USERSPACE; 841 auio.uio_rw = UIO_READ; 842 auio.uio_td = td; 843 844 flags = msg.msg_flags; 845 846 error = kern_recvmsg(uap->s, msg.msg_name ? &sa : NULL, &auio, 847 msg.msg_control ? &control : NULL, &flags, &uap->sysmsg_result); 848 849 /* 850 * Conditionally copyout the name and populate the namelen field. 851 */ 852 if (error == 0 && msg.msg_name) { 853 fromlen = MIN(msg.msg_namelen, sa->sa_len); 854 error = copyout(sa, msg.msg_name, fromlen); 855 if (error == 0) 856 error = copyout(&fromlen, ufromlenp, 857 sizeof(*ufromlenp)); 858 } 859 860 /* 861 * Copyout msg.msg_control and msg.msg_controllen. 862 */ 863 if (error == 0 && msg.msg_control) { 864 len = msg.msg_controllen; 865 m = control; 866 ctlbuf = (caddr_t)msg.msg_control; 867 868 while(m && len > 0) { 869 unsigned int tocopy; 870 871 if (len >= m->m_len) { 872 tocopy = m->m_len; 873 } else { 874 msg.msg_flags |= MSG_CTRUNC; 875 tocopy = len; 876 } 877 878 error = copyout(mtod(m, caddr_t), ctlbuf, tocopy); 879 if (error) 880 goto cleanup; 881 882 ctlbuf += tocopy; 883 len -= tocopy; 884 m = m->m_next; 885 } 886 controllen = ctlbuf - (caddr_t)msg.msg_control; 887 error = copyout(&controllen, ucontrollenp, 888 sizeof(*ucontrollenp)); 889 } 890 891 if (error == 0) 892 error = copyout(&flags, uflagsp, sizeof(*uflagsp)); 893 894 cleanup: 895 if (sa) 896 FREE(sa, M_SONAME); 897 iovec_free(&iov, aiov); 898 if (control) 899 m_freem(control); 900 return (error); 901 } 902 903 /* 904 * shutdown_args(int s, int how) 905 */ 906 int 907 kern_shutdown(int s, int how) 908 { 909 struct thread *td = curthread; 910 struct proc *p = td->td_proc; 911 struct file *fp; 912 int error; 913 914 KKASSERT(p); 915 error = holdsock(p->p_fd, s, &fp); 916 if (error) 917 return (error); 918 error = soshutdown((struct socket *)fp->f_data, how); 919 fdrop(fp, td); 920 return(error); 921 } 922 923 int 924 shutdown(struct shutdown_args *uap) 925 { 926 int error; 927 928 error = kern_shutdown(uap->s, uap->how); 929 930 return (error); 931 } 932 933 /* 934 * If sopt->sopt_td == NULL, then sopt->sopt_val is treated as an 935 * in kernel pointer instead of a userland pointer. This allows us 936 * to manipulate socket options in the emulation code. 937 */ 938 int 939 kern_setsockopt(int s, struct sockopt *sopt) 940 { 941 struct thread *td = curthread; 942 struct proc *p = td->td_proc; 943 struct file *fp; 944 int error; 945 946 if (sopt->sopt_val == 0 && sopt->sopt_valsize != 0) 947 return (EFAULT); 948 if (sopt->sopt_valsize < 0) 949 return (EINVAL); 950 951 error = holdsock(p->p_fd, s, &fp); 952 if (error) 953 return (error); 954 955 error = sosetopt((struct socket *)fp->f_data, sopt); 956 fdrop(fp, td); 957 return (error); 958 } 959 960 /* 961 * setsockopt_args(int s, int level, int name, caddr_t val, int valsize) 962 */ 963 int 964 setsockopt(struct setsockopt_args *uap) 965 { 966 struct thread *td = curthread; 967 struct sockopt sopt; 968 int error; 969 970 sopt.sopt_dir = SOPT_SET; 971 sopt.sopt_level = uap->level; 972 sopt.sopt_name = uap->name; 973 sopt.sopt_val = uap->val; 974 sopt.sopt_valsize = uap->valsize; 975 sopt.sopt_td = td; 976 977 error = kern_setsockopt(uap->s, &sopt); 978 return(error); 979 } 980 981 /* 982 * If sopt->sopt_td == NULL, then sopt->sopt_val is treated as an 983 * in kernel pointer instead of a userland pointer. This allows us 984 * to manipulate socket options in the emulation code. 985 */ 986 int 987 kern_getsockopt(int s, struct sockopt *sopt) 988 { 989 struct thread *td = curthread; 990 struct proc *p = td->td_proc; 991 struct file *fp; 992 int error; 993 994 if (sopt->sopt_val == 0 && sopt->sopt_valsize != 0) 995 return (EFAULT); 996 if (sopt->sopt_valsize < 0) 997 return (EINVAL); 998 999 error = holdsock(p->p_fd, s, &fp); 1000 if (error) 1001 return (error); 1002 1003 error = sogetopt((struct socket *)fp->f_data, sopt); 1004 fdrop(fp, td); 1005 return (error); 1006 } 1007 1008 /* 1009 * getsockopt_Args(int s, int level, int name, caddr_t val, int *avalsize) 1010 */ 1011 int 1012 getsockopt(struct getsockopt_args *uap) 1013 { 1014 struct thread *td = curthread; 1015 struct sockopt sopt; 1016 int error, valsize; 1017 1018 if (uap->val) { 1019 error = copyin(uap->avalsize, &valsize, sizeof(valsize)); 1020 if (error) 1021 return (error); 1022 if (valsize < 0) 1023 return (EINVAL); 1024 } else { 1025 valsize = 0; 1026 } 1027 1028 sopt.sopt_dir = SOPT_GET; 1029 sopt.sopt_level = uap->level; 1030 sopt.sopt_name = uap->name; 1031 sopt.sopt_val = uap->val; 1032 sopt.sopt_valsize = valsize; 1033 sopt.sopt_td = td; 1034 1035 error = kern_getsockopt(uap->s, &sopt); 1036 if (error == 0) { 1037 valsize = sopt.sopt_valsize; 1038 error = copyout(&valsize, uap->avalsize, sizeof(valsize)); 1039 } 1040 return (error); 1041 } 1042 1043 /* 1044 * The second argument to kern_getsockname() is a handle to a struct sockaddr. 1045 * This allows kern_getsockname() to return a pointer to an allocated struct 1046 * sockaddr which must be freed later with FREE(). The caller must 1047 * initialize *name to NULL. 1048 */ 1049 int 1050 kern_getsockname(int s, struct sockaddr **name, int *namelen) 1051 { 1052 struct thread *td = curthread; 1053 struct proc *p = td->td_proc; 1054 struct file *fp; 1055 struct socket *so; 1056 struct sockaddr *sa = NULL; 1057 int error; 1058 1059 error = holdsock(p->p_fd, s, &fp); 1060 if (error) 1061 return (error); 1062 if (*namelen < 0) { 1063 fdrop(fp, td); 1064 return (EINVAL); 1065 } 1066 so = (struct socket *)fp->f_data; 1067 error = so_pru_sockaddr(so, &sa); 1068 if (error == 0) { 1069 if (sa == 0) { 1070 *namelen = 0; 1071 } else { 1072 *namelen = MIN(*namelen, sa->sa_len); 1073 *name = sa; 1074 } 1075 } 1076 1077 fdrop(fp, td); 1078 return (error); 1079 } 1080 1081 /* 1082 * getsockname_args(int fdes, caddr_t asa, int *alen) 1083 * 1084 * Get socket name. 1085 */ 1086 int 1087 getsockname(struct getsockname_args *uap) 1088 { 1089 struct sockaddr *sa = NULL; 1090 int error, sa_len; 1091 1092 error = copyin(uap->alen, &sa_len, sizeof(sa_len)); 1093 if (error) 1094 return (error); 1095 1096 error = kern_getsockname(uap->fdes, &sa, &sa_len); 1097 1098 if (error == 0) 1099 error = copyout(sa, uap->asa, sa_len); 1100 if (error == 0) 1101 error = copyout(&sa_len, uap->alen, sizeof(*uap->alen)); 1102 if (sa) 1103 FREE(sa, M_SONAME); 1104 return (error); 1105 } 1106 1107 /* 1108 * The second argument to kern_getpeername() is a handle to a struct sockaddr. 1109 * This allows kern_getpeername() to return a pointer to an allocated struct 1110 * sockaddr which must be freed later with FREE(). The caller must 1111 * initialize *name to NULL. 1112 */ 1113 int 1114 kern_getpeername(int s, struct sockaddr **name, int *namelen) 1115 { 1116 struct thread *td = curthread; 1117 struct proc *p = td->td_proc; 1118 struct file *fp; 1119 struct socket *so; 1120 struct sockaddr *sa = NULL; 1121 int error; 1122 1123 error = holdsock(p->p_fd, s, &fp); 1124 if (error) 1125 return (error); 1126 if (*namelen < 0) { 1127 fdrop(fp, td); 1128 return (EINVAL); 1129 } 1130 so = (struct socket *)fp->f_data; 1131 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1132 fdrop(fp, td); 1133 return (ENOTCONN); 1134 } 1135 error = so_pru_peeraddr(so, &sa); 1136 if (error == 0) { 1137 if (sa == 0) { 1138 *namelen = 0; 1139 } else { 1140 *namelen = MIN(*namelen, sa->sa_len); 1141 *name = sa; 1142 } 1143 } 1144 1145 fdrop(fp, td); 1146 return (error); 1147 } 1148 1149 /* 1150 * getpeername_args(int fdes, caddr_t asa, int *alen) 1151 * 1152 * Get name of peer for connected socket. 1153 */ 1154 int 1155 getpeername(struct getpeername_args *uap) 1156 { 1157 struct sockaddr *sa = NULL; 1158 int error, sa_len; 1159 1160 error = copyin(uap->alen, &sa_len, sizeof(sa_len)); 1161 if (error) 1162 return (error); 1163 1164 error = kern_getpeername(uap->fdes, &sa, &sa_len); 1165 1166 if (error == 0) 1167 error = copyout(sa, uap->asa, sa_len); 1168 if (error == 0) 1169 error = copyout(&sa_len, uap->alen, sizeof(*uap->alen)); 1170 if (sa) 1171 FREE(sa, M_SONAME); 1172 return (error); 1173 } 1174 1175 int 1176 getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len) 1177 { 1178 struct sockaddr *sa; 1179 int error; 1180 1181 *namp = NULL; 1182 if (len > SOCK_MAXADDRLEN) 1183 return ENAMETOOLONG; 1184 if (len < offsetof(struct sockaddr, sa_data[0])) 1185 return EDOM; 1186 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1187 error = copyin(uaddr, sa, len); 1188 if (error) { 1189 FREE(sa, M_SONAME); 1190 } else { 1191 #if BYTE_ORDER != BIG_ENDIAN 1192 /* 1193 * The bind(), connect(), and sendto() syscalls were not 1194 * versioned for COMPAT_43. Thus, this check must stay. 1195 */ 1196 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1197 sa->sa_family = sa->sa_len; 1198 #endif 1199 sa->sa_len = len; 1200 *namp = sa; 1201 } 1202 return error; 1203 } 1204 1205 /* 1206 * holdsock() - load the struct file pointer associated 1207 * with a socket into *fpp. If an error occurs, non-zero 1208 * will be returned and *fpp will be set to NULL. 1209 */ 1210 int 1211 holdsock(fdp, fdes, fpp) 1212 struct filedesc *fdp; 1213 int fdes; 1214 struct file **fpp; 1215 { 1216 struct file *fp = NULL; 1217 int error = 0; 1218 1219 if ((unsigned)fdes >= fdp->fd_nfiles || 1220 (fp = fdp->fd_ofiles[fdes]) == NULL) { 1221 error = EBADF; 1222 } else if (fp->f_type != DTYPE_SOCKET) { 1223 error = ENOTSOCK; 1224 fp = NULL; 1225 } else { 1226 fhold(fp); 1227 } 1228 *fpp = fp; 1229 return(error); 1230 } 1231 1232 /* 1233 * sendfile(2). 1234 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1235 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1236 * 1237 * Send a file specified by 'fd' and starting at 'offset' to a socket 1238 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1239 * nbytes == 0. Optionally add a header and/or trailer to the socket 1240 * output. If specified, write the total number of bytes sent into *sbytes. 1241 * 1242 * In FreeBSD kern/uipc_syscalls.c,v 1.103, a bug was fixed that caused 1243 * the headers to count against the remaining bytes to be sent from 1244 * the file descriptor. We may wish to implement a compatibility syscall 1245 * in the future. 1246 */ 1247 int 1248 sendfile(struct sendfile_args *uap) 1249 { 1250 struct thread *td = curthread; 1251 struct proc *p = td->td_proc; 1252 struct file *fp; 1253 struct filedesc *fdp; 1254 struct vnode *vp = NULL; 1255 struct sf_hdtr hdtr; 1256 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 1257 struct uio auio; 1258 off_t hdtr_size = 0, sbytes; 1259 int error, res; 1260 1261 KKASSERT(p); 1262 fdp = p->p_fd; 1263 1264 /* 1265 * Do argument checking. Must be a regular file in, stream 1266 * type and connected socket out, positive offset. 1267 */ 1268 fp = holdfp(fdp, uap->fd, FREAD); 1269 if (fp == NULL) { 1270 return (EBADF); 1271 } 1272 if (fp->f_type != DTYPE_VNODE) { 1273 fdrop(fp, td); 1274 return (EINVAL); 1275 } 1276 vp = (struct vnode *)fp->f_data; 1277 vref(vp); 1278 fdrop(fp, td); 1279 1280 /* 1281 * If specified, get the pointer to the sf_hdtr struct for 1282 * any headers/trailers. 1283 */ 1284 if (uap->hdtr) { 1285 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1286 if (error) 1287 goto done; 1288 /* 1289 * Send any headers. 1290 */ 1291 if (hdtr.headers) { 1292 error = iovec_copyin(hdtr.headers, &iov, aiov, 1293 hdtr.hdr_cnt, &auio.uio_resid); 1294 if (error) 1295 goto done; 1296 auio.uio_iov = iov; 1297 auio.uio_iovcnt = hdtr.hdr_cnt; 1298 auio.uio_offset = 0; 1299 auio.uio_segflg = UIO_USERSPACE; 1300 auio.uio_rw = UIO_WRITE; 1301 auio.uio_td = td; 1302 1303 error = kern_sendmsg(uap->s, NULL, &auio, NULL, 0, 1304 &res); 1305 1306 iovec_free(&iov, aiov); 1307 if (error) 1308 goto done; 1309 hdtr_size += res; 1310 } 1311 } 1312 1313 error = kern_sendfile(vp, uap->s, uap->offset, uap->nbytes, 1314 &sbytes, uap->flags); 1315 if (error) 1316 goto done; 1317 1318 /* 1319 * Send trailers. Wimp out and use writev(2). 1320 */ 1321 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1322 error = iovec_copyin(hdtr.trailers, &iov, aiov, 1323 hdtr.trl_cnt, &auio.uio_resid); 1324 if (error) 1325 goto done; 1326 auio.uio_iov = iov; 1327 auio.uio_iovcnt = hdtr.trl_cnt; 1328 auio.uio_offset = 0; 1329 auio.uio_segflg = UIO_USERSPACE; 1330 auio.uio_rw = UIO_WRITE; 1331 auio.uio_td = td; 1332 1333 error = kern_sendmsg(uap->s, NULL, &auio, NULL, 0, &res); 1334 1335 iovec_free(&iov, aiov); 1336 if (error) 1337 goto done; 1338 hdtr_size += res; 1339 } 1340 1341 done: 1342 if (uap->sbytes != NULL) { 1343 sbytes += hdtr_size; 1344 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1345 } 1346 if (vp) 1347 vrele(vp); 1348 return (error); 1349 } 1350 1351 int 1352 kern_sendfile(struct vnode *vp, int s, off_t offset, size_t nbytes, 1353 off_t *sbytes, int flags) 1354 { 1355 struct thread *td = curthread; 1356 struct proc *p = td->td_proc; 1357 struct vm_object *obj; 1358 struct socket *so; 1359 struct file *fp; 1360 struct mbuf *m; 1361 struct sf_buf *sf; 1362 struct vm_page *pg; 1363 off_t off, xfsize; 1364 int error = 0; 1365 1366 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1367 error = EINVAL; 1368 goto done; 1369 } 1370 error = holdsock(p->p_fd, s, &fp); 1371 if (error) 1372 goto done; 1373 so = (struct socket *)fp->f_data; 1374 if (so->so_type != SOCK_STREAM) { 1375 error = EINVAL; 1376 goto done; 1377 } 1378 if ((so->so_state & SS_ISCONNECTED) == 0) { 1379 error = ENOTCONN; 1380 goto done; 1381 } 1382 if (offset < 0) { 1383 error = EINVAL; 1384 goto done; 1385 } 1386 1387 *sbytes = 0; 1388 /* 1389 * Protect against multiple writers to the socket. 1390 */ 1391 (void) sblock(&so->so_snd, M_WAITOK); 1392 1393 /* 1394 * Loop through the pages in the file, starting with the requested 1395 * offset. Get a file page (do I/O if necessary), map the file page 1396 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1397 * it on the socket. 1398 */ 1399 for (off = offset; ; off += xfsize, *sbytes += xfsize) { 1400 vm_pindex_t pindex; 1401 vm_offset_t pgoff; 1402 1403 pindex = OFF_TO_IDX(off); 1404 retry_lookup: 1405 /* 1406 * Calculate the amount to transfer. Not to exceed a page, 1407 * the EOF, or the passed in nbytes. 1408 */ 1409 xfsize = obj->un_pager.vnp.vnp_size - off; 1410 if (xfsize > PAGE_SIZE) 1411 xfsize = PAGE_SIZE; 1412 pgoff = (vm_offset_t)(off & PAGE_MASK); 1413 if (PAGE_SIZE - pgoff < xfsize) 1414 xfsize = PAGE_SIZE - pgoff; 1415 if (nbytes && xfsize > (nbytes - *sbytes)) 1416 xfsize = nbytes - *sbytes; 1417 if (xfsize <= 0) 1418 break; 1419 /* 1420 * Optimize the non-blocking case by looking at the socket space 1421 * before going to the extra work of constituting the sf_buf. 1422 */ 1423 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1424 if (so->so_state & SS_CANTSENDMORE) 1425 error = EPIPE; 1426 else 1427 error = EAGAIN; 1428 sbunlock(&so->so_snd); 1429 goto done; 1430 } 1431 /* 1432 * Attempt to look up the page. 1433 * 1434 * Allocate if not found 1435 * 1436 * Wait and loop if busy. 1437 */ 1438 pg = vm_page_lookup(obj, pindex); 1439 1440 if (pg == NULL) { 1441 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL); 1442 if (pg == NULL) { 1443 VM_WAIT; 1444 goto retry_lookup; 1445 } 1446 vm_page_wakeup(pg); 1447 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) { 1448 goto retry_lookup; 1449 } 1450 1451 /* 1452 * Wire the page so it does not get ripped out from under 1453 * us. 1454 */ 1455 1456 vm_page_wire(pg); 1457 1458 /* 1459 * If page is not valid for what we need, initiate I/O 1460 */ 1461 1462 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1463 struct uio auio; 1464 struct iovec aiov; 1465 int bsize; 1466 1467 /* 1468 * Ensure that our page is still around when the I/O 1469 * completes. 1470 */ 1471 vm_page_io_start(pg); 1472 1473 /* 1474 * Get the page from backing store. 1475 */ 1476 bsize = vp->v_mount->mnt_stat.f_iosize; 1477 auio.uio_iov = &aiov; 1478 auio.uio_iovcnt = 1; 1479 aiov.iov_base = 0; 1480 aiov.iov_len = MAXBSIZE; 1481 auio.uio_resid = MAXBSIZE; 1482 auio.uio_offset = trunc_page(off); 1483 auio.uio_segflg = UIO_NOCOPY; 1484 auio.uio_rw = UIO_READ; 1485 auio.uio_td = td; 1486 vn_lock(vp, NULL, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1487 error = VOP_READ(vp, &auio, 1488 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1489 p->p_ucred); 1490 VOP_UNLOCK(vp, NULL, 0, td); 1491 vm_page_flag_clear(pg, PG_ZERO); 1492 vm_page_io_finish(pg); 1493 if (error) { 1494 vm_page_unwire(pg, 0); 1495 /* 1496 * See if anyone else might know about this page. 1497 * If not and it is not valid, then free it. 1498 */ 1499 if (pg->wire_count == 0 && pg->valid == 0 && 1500 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1501 pg->hold_count == 0) { 1502 vm_page_busy(pg); 1503 vm_page_free(pg); 1504 } 1505 sbunlock(&so->so_snd); 1506 goto done; 1507 } 1508 } 1509 1510 1511 /* 1512 * Get a sendfile buf. We usually wait as long as necessary, 1513 * but this wait can be interrupted. 1514 */ 1515 if ((sf = sf_buf_alloc(pg)) == NULL) { 1516 s = splvm(); 1517 vm_page_unwire(pg, 0); 1518 if (pg->wire_count == 0 && pg->object == NULL) 1519 vm_page_free(pg); 1520 splx(s); 1521 sbunlock(&so->so_snd); 1522 error = EINTR; 1523 goto done; 1524 } 1525 1526 /* 1527 * Get an mbuf header and set it up as having external storage. 1528 */ 1529 MGETHDR(m, M_WAIT, MT_DATA); 1530 if (m == NULL) { 1531 error = ENOBUFS; 1532 sf_buf_free((void *)sf->kva, PAGE_SIZE); 1533 sbunlock(&so->so_snd); 1534 goto done; 1535 } 1536 m->m_ext.ext_free = sf_buf_free; 1537 m->m_ext.ext_ref = sf_buf_ref; 1538 m->m_ext.ext_buf = (void *)sf->kva; 1539 m->m_ext.ext_size = PAGE_SIZE; 1540 m->m_data = (char *) sf->kva + pgoff; 1541 m->m_flags |= M_EXT; 1542 m->m_pkthdr.len = m->m_len = xfsize; 1543 /* 1544 * Add the buffer to the socket buffer chain. 1545 */ 1546 s = splnet(); 1547 retry_space: 1548 /* 1549 * Make sure that the socket is still able to take more data. 1550 * CANTSENDMORE being true usually means that the connection 1551 * was closed. so_error is true when an error was sensed after 1552 * a previous send. 1553 * The state is checked after the page mapping and buffer 1554 * allocation above since those operations may block and make 1555 * any socket checks stale. From this point forward, nothing 1556 * blocks before the pru_send (or more accurately, any blocking 1557 * results in a loop back to here to re-check). 1558 */ 1559 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1560 if (so->so_state & SS_CANTSENDMORE) { 1561 error = EPIPE; 1562 } else { 1563 error = so->so_error; 1564 so->so_error = 0; 1565 } 1566 m_freem(m); 1567 sbunlock(&so->so_snd); 1568 splx(s); 1569 goto done; 1570 } 1571 /* 1572 * Wait for socket space to become available. We do this just 1573 * after checking the connection state above in order to avoid 1574 * a race condition with sbwait(). 1575 */ 1576 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1577 if (so->so_state & SS_NBIO) { 1578 m_freem(m); 1579 sbunlock(&so->so_snd); 1580 splx(s); 1581 error = EAGAIN; 1582 goto done; 1583 } 1584 error = sbwait(&so->so_snd); 1585 /* 1586 * An error from sbwait usually indicates that we've 1587 * been interrupted by a signal. If we've sent anything 1588 * then return bytes sent, otherwise return the error. 1589 */ 1590 if (error) { 1591 m_freem(m); 1592 sbunlock(&so->so_snd); 1593 splx(s); 1594 goto done; 1595 } 1596 goto retry_space; 1597 } 1598 error = so_pru_send(so, 0, m, NULL, NULL, td); 1599 splx(s); 1600 if (error) { 1601 sbunlock(&so->so_snd); 1602 goto done; 1603 } 1604 } 1605 sbunlock(&so->so_snd); 1606 1607 done: 1608 if (fp) 1609 fdrop(fp, td); 1610 return (error); 1611 } 1612