1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.65.2.17 2003/04/04 17:11:16 tegge Exp $ 38 * $DragonFly: src/sys/kern/uipc_syscalls.c,v 1.45 2004/11/20 20:35:33 dillon Exp $ 39 */ 40 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/sysproto.h> 47 #include <sys/malloc.h> 48 #include <sys/filedesc.h> 49 #include <sys/event.h> 50 #include <sys/proc.h> 51 #include <sys/fcntl.h> 52 #include <sys/file.h> 53 #include <sys/filio.h> 54 #include <sys/kern_syscall.h> 55 #include <sys/mbuf.h> 56 #include <sys/protosw.h> 57 #include <sys/sfbuf.h> 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/socketops.h> 61 #include <sys/uio.h> 62 #include <sys/vnode.h> 63 #include <sys/lock.h> 64 #include <sys/mount.h> 65 #ifdef KTRACE 66 #include <sys/ktrace.h> 67 #endif 68 #include <vm/vm.h> 69 #include <vm/vm_object.h> 70 #include <vm/vm_page.h> 71 #include <vm/vm_pageout.h> 72 #include <vm/vm_kern.h> 73 #include <vm/vm_extern.h> 74 #include <sys/file2.h> 75 #include <sys/signalvar.h> 76 77 #include <sys/thread2.h> 78 #include <sys/msgport2.h> 79 80 struct sfbuf_mref { 81 struct sf_buf *sf; 82 int mref_count; 83 }; 84 85 static MALLOC_DEFINE(M_SENDFILE, "sendfile", "sendfile sfbuf ref structures"); 86 87 /* 88 * System call interface to the socket abstraction. 89 */ 90 91 extern struct fileops socketops; 92 93 /* 94 * socket_args(int domain, int type, int protocol) 95 */ 96 int 97 kern_socket(int domain, int type, int protocol, int *res) 98 { 99 struct thread *td = curthread; 100 struct proc *p = td->td_proc; 101 struct filedesc *fdp; 102 struct socket *so; 103 struct file *fp; 104 int fd, error; 105 106 KKASSERT(p); 107 fdp = p->p_fd; 108 109 error = falloc(p, &fp, &fd); 110 if (error) 111 return (error); 112 error = socreate(domain, &so, type, protocol, td); 113 if (error) { 114 if (fdp->fd_ofiles[fd] == fp) { 115 fdp->fd_ofiles[fd] = NULL; 116 fdrop(fp, td); 117 } 118 } else { 119 fp->f_data = (caddr_t)so; 120 fp->f_flag = FREAD|FWRITE; 121 fp->f_ops = &socketops; 122 fp->f_type = DTYPE_SOCKET; 123 *res = fd; 124 } 125 fdrop(fp, td); 126 return (error); 127 } 128 129 int 130 socket(struct socket_args *uap) 131 { 132 int error; 133 134 error = kern_socket(uap->domain, uap->type, uap->protocol, 135 &uap->sysmsg_result); 136 137 return (error); 138 } 139 140 int 141 kern_bind(int s, struct sockaddr *sa) 142 { 143 struct thread *td = curthread; 144 struct proc *p = td->td_proc; 145 struct file *fp; 146 int error; 147 148 KKASSERT(p); 149 error = holdsock(p->p_fd, s, &fp); 150 if (error) 151 return (error); 152 error = sobind((struct socket *)fp->f_data, sa, td); 153 fdrop(fp, td); 154 return (error); 155 } 156 157 /* 158 * bind_args(int s, caddr_t name, int namelen) 159 */ 160 int 161 bind(struct bind_args *uap) 162 { 163 struct sockaddr *sa; 164 int error; 165 166 error = getsockaddr(&sa, uap->name, uap->namelen); 167 if (error) 168 return (error); 169 error = kern_bind(uap->s, sa); 170 FREE(sa, M_SONAME); 171 172 return (error); 173 } 174 175 int 176 kern_listen(int s, int backlog) 177 { 178 struct thread *td = curthread; 179 struct proc *p = td->td_proc; 180 struct file *fp; 181 int error; 182 183 KKASSERT(p); 184 error = holdsock(p->p_fd, s, &fp); 185 if (error) 186 return (error); 187 error = solisten((struct socket *)fp->f_data, backlog, td); 188 fdrop(fp, td); 189 return(error); 190 } 191 192 /* 193 * listen_args(int s, int backlog) 194 */ 195 int 196 listen(struct listen_args *uap) 197 { 198 int error; 199 200 error = kern_listen(uap->s, uap->backlog); 201 return (error); 202 } 203 204 /* 205 * Returns the accepted socket as well. 206 */ 207 static boolean_t 208 soaccept_predicate(struct netmsg *msg0) 209 { 210 struct netmsg_so_notify *msg = (struct netmsg_so_notify *)msg0; 211 struct socket *head = msg->nm_so; 212 213 if (head->so_error != 0) { 214 msg->nm_lmsg.ms_error = head->so_error; 215 return (TRUE); 216 } 217 if (!TAILQ_EMPTY(&head->so_comp)) { 218 /* Abuse nm_so field as copy in/copy out parameter. XXX JH */ 219 msg->nm_so = TAILQ_FIRST(&head->so_comp); 220 TAILQ_REMOVE(&head->so_comp, msg->nm_so, so_list); 221 head->so_qlen--; 222 223 msg->nm_lmsg.ms_error = 0; 224 return (TRUE); 225 } 226 if (head->so_state & SS_CANTRCVMORE) { 227 msg->nm_lmsg.ms_error = ECONNABORTED; 228 return (TRUE); 229 } 230 if (head->so_state & SS_NBIO) { 231 msg->nm_lmsg.ms_error = EWOULDBLOCK; 232 return (TRUE); 233 } 234 235 return (FALSE); 236 } 237 238 /* 239 * The second argument to kern_accept() is a handle to a struct sockaddr. 240 * This allows kern_accept() to return a pointer to an allocated struct 241 * sockaddr which must be freed later with FREE(). The caller must 242 * initialize *name to NULL. 243 */ 244 int 245 kern_accept(int s, struct sockaddr **name, int *namelen, int *res) 246 { 247 struct thread *td = curthread; 248 struct proc *p = td->td_proc; 249 struct filedesc *fdp = p->p_fd; 250 struct file *lfp = NULL; 251 struct file *nfp = NULL; 252 struct sockaddr *sa; 253 struct socket *head, *so; 254 struct netmsg_so_notify msg; 255 lwkt_port_t port; 256 int fd; 257 u_int fflag; /* type must match fp->f_flag */ 258 int error, tmp; 259 260 if (name && namelen && *namelen < 0) 261 return (EINVAL); 262 263 error = holdsock(fdp, s, &lfp); 264 if (error) 265 return (error); 266 267 error = falloc(p, &nfp, &fd); 268 if (error) { /* Probably ran out of file descriptors. */ 269 *res = -1; 270 fdrop(lfp, td); 271 return (error); 272 } 273 *res = fd; 274 275 head = (struct socket *)lfp->f_data; 276 if ((head->so_options & SO_ACCEPTCONN) == 0) { 277 error = EINVAL; 278 goto done; 279 } 280 281 /* optimize for uniprocessor case later XXX JH */ 282 port = head->so_proto->pr_mport(head, NULL, PRU_PRED); 283 lwkt_initmsg(&msg.nm_lmsg, &curthread->td_msgport, 284 MSGF_PCATCH | MSGF_ABORTABLE, 285 lwkt_cmd_func(netmsg_so_notify), 286 lwkt_cmd_func(netmsg_so_notify_abort)); 287 msg.nm_predicate = soaccept_predicate; 288 msg.nm_so = head; 289 msg.nm_etype = NM_REVENT; 290 error = lwkt_domsg(port, &msg.nm_lmsg); 291 if (error) 292 goto done; 293 294 /* 295 * At this point we have the connection that's ready to be accepted. 296 */ 297 so = msg.nm_so; 298 299 fflag = lfp->f_flag; 300 301 /* connection has been removed from the listen queue */ 302 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 303 304 so->so_state &= ~SS_COMP; 305 so->so_head = NULL; 306 if (head->so_sigio != NULL) 307 fsetown(fgetown(head->so_sigio), &so->so_sigio); 308 309 nfp->f_data = (caddr_t)so; 310 nfp->f_flag = fflag; 311 nfp->f_ops = &socketops; 312 nfp->f_type = DTYPE_SOCKET; 313 /* Sync socket nonblocking/async state with file flags */ 314 tmp = fflag & FNONBLOCK; 315 (void) fo_ioctl(nfp, FIONBIO, (caddr_t)&tmp, td); 316 tmp = fflag & FASYNC; 317 (void) fo_ioctl(nfp, FIOASYNC, (caddr_t)&tmp, td); 318 319 sa = NULL; 320 error = soaccept(so, &sa); 321 322 /* 323 * Set the returned name and namelen as applicable. Set the returned 324 * namelen to 0 for older code which might ignore the return value 325 * from accept. 326 */ 327 if (error == 0) { 328 if (sa && name && namelen) { 329 if (*namelen > sa->sa_len) 330 *namelen = sa->sa_len; 331 *name = sa; 332 } else { 333 if (sa) 334 FREE(sa, M_SONAME); 335 } 336 } 337 338 done: 339 /* 340 * close the new descriptor, assuming someone hasn't ripped it 341 * out from under us. Note that *res is normally ignored if an 342 * error is returned but a syscall message will still have access 343 * to the result code. 344 */ 345 if (error) { 346 *res = -1; 347 if (fdp->fd_ofiles[fd] == nfp) { 348 fdp->fd_ofiles[fd] = NULL; 349 fdrop(nfp, td); 350 } 351 } 352 353 /* 354 * Release explicitly held references before returning. 355 */ 356 if (nfp) 357 fdrop(nfp, td); 358 fdrop(lfp, td); 359 return (error); 360 } 361 362 /* 363 * accept_args(int s, caddr_t name, int *anamelen) 364 */ 365 int 366 accept(struct accept_args *uap) 367 { 368 struct sockaddr *sa = NULL; 369 int sa_len; 370 int error; 371 372 if (uap->name) { 373 error = copyin(uap->anamelen, &sa_len, sizeof(sa_len)); 374 if (error) 375 return (error); 376 377 error = kern_accept(uap->s, &sa, &sa_len, &uap->sysmsg_result); 378 379 if (error == 0) 380 error = copyout(sa, uap->name, sa_len); 381 if (error == 0) { 382 error = copyout(&sa_len, uap->anamelen, 383 sizeof(*uap->anamelen)); 384 } 385 if (sa) 386 FREE(sa, M_SONAME); 387 } else { 388 error = kern_accept(uap->s, NULL, 0, &uap->sysmsg_result); 389 } 390 return (error); 391 } 392 393 /* 394 * Returns TRUE if predicate satisfied. 395 */ 396 static boolean_t 397 soconnected_predicate(struct netmsg *msg0) 398 { 399 struct netmsg_so_notify *msg = (struct netmsg_so_notify *)msg0; 400 struct socket *so = msg->nm_so; 401 402 /* check predicate */ 403 if (!(so->so_state & SS_ISCONNECTING) || so->so_error != 0) { 404 msg->nm_lmsg.ms_error = so->so_error; 405 return (TRUE); 406 } 407 408 return (FALSE); 409 } 410 411 int 412 kern_connect(int s, struct sockaddr *sa) 413 { 414 struct thread *td = curthread; 415 struct proc *p = td->td_proc; 416 struct file *fp; 417 struct socket *so; 418 int error; 419 420 error = holdsock(p->p_fd, s, &fp); 421 if (error) 422 return (error); 423 so = (struct socket *)fp->f_data; 424 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 425 error = EALREADY; 426 goto done; 427 } 428 error = soconnect(so, sa, td); 429 if (error) 430 goto bad; 431 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 432 error = EINPROGRESS; 433 goto done; 434 } 435 if ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 436 struct netmsg_so_notify msg; 437 lwkt_port_t port; 438 439 port = so->so_proto->pr_mport(so, sa, PRU_PRED); 440 lwkt_initmsg(&msg.nm_lmsg, 441 &curthread->td_msgport, 442 MSGF_PCATCH | MSGF_ABORTABLE, 443 lwkt_cmd_func(netmsg_so_notify), 444 lwkt_cmd_func(netmsg_so_notify_abort)); 445 msg.nm_predicate = soconnected_predicate; 446 msg.nm_so = so; 447 msg.nm_etype = NM_REVENT; 448 error = lwkt_domsg(port, &msg.nm_lmsg); 449 } 450 if (error == 0) { 451 error = so->so_error; 452 so->so_error = 0; 453 } 454 bad: 455 so->so_state &= ~SS_ISCONNECTING; 456 if (error == ERESTART) 457 error = EINTR; 458 done: 459 fdrop(fp, td); 460 return (error); 461 } 462 463 /* 464 * connect_args(int s, caddr_t name, int namelen) 465 */ 466 int 467 connect(struct connect_args *uap) 468 { 469 struct sockaddr *sa; 470 int error; 471 472 error = getsockaddr(&sa, uap->name, uap->namelen); 473 if (error) 474 return (error); 475 error = kern_connect(uap->s, sa); 476 FREE(sa, M_SONAME); 477 478 return (error); 479 } 480 481 int 482 kern_socketpair(int domain, int type, int protocol, int *sv) 483 { 484 struct thread *td = curthread; 485 struct proc *p = td->td_proc; 486 struct filedesc *fdp; 487 struct file *fp1, *fp2; 488 struct socket *so1, *so2; 489 int fd, error; 490 491 KKASSERT(p); 492 fdp = p->p_fd; 493 error = socreate(domain, &so1, type, protocol, td); 494 if (error) 495 return (error); 496 error = socreate(domain, &so2, type, protocol, td); 497 if (error) 498 goto free1; 499 error = falloc(p, &fp1, &fd); 500 if (error) 501 goto free2; 502 sv[0] = fd; 503 fp1->f_data = (caddr_t)so1; 504 error = falloc(p, &fp2, &fd); 505 if (error) 506 goto free3; 507 fp2->f_data = (caddr_t)so2; 508 sv[1] = fd; 509 error = soconnect2(so1, so2); 510 if (error) 511 goto free4; 512 if (type == SOCK_DGRAM) { 513 /* 514 * Datagram socket connection is asymmetric. 515 */ 516 error = soconnect2(so2, so1); 517 if (error) 518 goto free4; 519 } 520 fp1->f_flag = fp2->f_flag = FREAD|FWRITE; 521 fp1->f_ops = fp2->f_ops = &socketops; 522 fp1->f_type = fp2->f_type = DTYPE_SOCKET; 523 fdrop(fp1, td); 524 fdrop(fp2, td); 525 return (error); 526 free4: 527 if (fdp->fd_ofiles[sv[1]] == fp2) { 528 fdp->fd_ofiles[sv[1]] = NULL; 529 fdrop(fp2, td); 530 } 531 fdrop(fp2, td); 532 free3: 533 if (fdp->fd_ofiles[sv[0]] == fp1) { 534 fdp->fd_ofiles[sv[0]] = NULL; 535 fdrop(fp1, td); 536 } 537 fdrop(fp1, td); 538 free2: 539 (void)soclose(so2); 540 free1: 541 (void)soclose(so1); 542 return (error); 543 } 544 545 /* 546 * socketpair(int domain, int type, int protocol, int *rsv) 547 */ 548 int 549 socketpair(struct socketpair_args *uap) 550 { 551 int error, sockv[2]; 552 553 error = kern_socketpair(uap->domain, uap->type, uap->protocol, sockv); 554 555 if (error == 0) 556 error = copyout(sockv, uap->rsv, sizeof(sockv)); 557 return (error); 558 } 559 560 int 561 kern_sendmsg(int s, struct sockaddr *sa, struct uio *auio, 562 struct mbuf *control, int flags, int *res) 563 { 564 struct thread *td = curthread; 565 struct proc *p = td->td_proc; 566 struct file *fp; 567 int len, error; 568 struct socket *so; 569 #ifdef KTRACE 570 struct iovec *ktriov = NULL; 571 struct uio ktruio; 572 #endif 573 574 error = holdsock(p->p_fd, s, &fp); 575 if (error) 576 return (error); 577 if (auio->uio_resid < 0) { 578 error = EINVAL; 579 goto done; 580 } 581 #ifdef KTRACE 582 if (KTRPOINT(td, KTR_GENIO)) { 583 int iovlen = auio->uio_iovcnt * sizeof (struct iovec); 584 585 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 586 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen); 587 ktruio = *auio; 588 } 589 #endif 590 len = auio->uio_resid; 591 so = (struct socket *)fp->f_data; 592 error = so_pru_sosend(so, sa, auio, NULL, control, flags, td); 593 if (error) { 594 if (auio->uio_resid != len && (error == ERESTART || 595 error == EINTR || error == EWOULDBLOCK)) 596 error = 0; 597 if (error == EPIPE) 598 psignal(p, SIGPIPE); 599 } 600 #ifdef KTRACE 601 if (ktriov != NULL) { 602 if (error == 0) { 603 ktruio.uio_iov = ktriov; 604 ktruio.uio_resid = len - auio->uio_resid; 605 ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error); 606 } 607 FREE(ktriov, M_TEMP); 608 } 609 #endif 610 if (error == 0) 611 *res = len - auio->uio_resid; 612 done: 613 fdrop(fp, td); 614 return (error); 615 } 616 617 /* 618 * sendto_args(int s, caddr_t buf, size_t len, int flags, caddr_t to, int tolen) 619 */ 620 int 621 sendto(struct sendto_args *uap) 622 { 623 struct thread *td = curthread; 624 struct uio auio; 625 struct iovec aiov; 626 struct sockaddr *sa = NULL; 627 int error; 628 629 if (uap->to) { 630 error = getsockaddr(&sa, uap->to, uap->tolen); 631 if (error) 632 return (error); 633 } 634 aiov.iov_base = uap->buf; 635 aiov.iov_len = uap->len; 636 auio.uio_iov = &aiov; 637 auio.uio_iovcnt = 1; 638 auio.uio_offset = 0; 639 auio.uio_resid = uap->len; 640 auio.uio_segflg = UIO_USERSPACE; 641 auio.uio_rw = UIO_WRITE; 642 auio.uio_td = td; 643 644 error = kern_sendmsg(uap->s, sa, &auio, NULL, uap->flags, 645 &uap->sysmsg_result); 646 647 if (sa) 648 FREE(sa, M_SONAME); 649 return (error); 650 } 651 652 /* 653 * sendmsg_args(int s, caddr_t msg, int flags) 654 */ 655 int 656 sendmsg(struct sendmsg_args *uap) 657 { 658 struct thread *td = curthread; 659 struct msghdr msg; 660 struct uio auio; 661 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 662 struct sockaddr *sa = NULL; 663 struct mbuf *control = NULL; 664 int error; 665 666 error = copyin(uap->msg, (caddr_t)&msg, sizeof(msg)); 667 if (error) 668 return (error); 669 670 /* 671 * Conditionally copyin msg.msg_name. 672 */ 673 if (msg.msg_name) { 674 error = getsockaddr(&sa, msg.msg_name, msg.msg_namelen); 675 if (error) 676 return (error); 677 } 678 679 /* 680 * Populate auio. 681 */ 682 error = iovec_copyin(msg.msg_iov, &iov, aiov, msg.msg_iovlen, 683 &auio.uio_resid); 684 if (error) 685 goto cleanup; 686 auio.uio_iov = iov; 687 auio.uio_iovcnt = msg.msg_iovlen; 688 auio.uio_offset = 0; 689 auio.uio_segflg = UIO_USERSPACE; 690 auio.uio_rw = UIO_WRITE; 691 auio.uio_td = td; 692 693 /* 694 * Conditionally copyin msg.msg_control. 695 */ 696 if (msg.msg_control) { 697 if (msg.msg_controllen < sizeof(struct cmsghdr) || 698 msg.msg_controllen > MLEN) { 699 error = EINVAL; 700 goto cleanup; 701 } 702 control = m_get(MB_WAIT, MT_CONTROL); 703 if (control == NULL) { 704 error = ENOBUFS; 705 goto cleanup; 706 } 707 control->m_len = msg.msg_controllen; 708 error = copyin(msg.msg_control, mtod(control, caddr_t), 709 msg.msg_controllen); 710 if (error) { 711 m_free(control); 712 goto cleanup; 713 } 714 } 715 716 error = kern_sendmsg(uap->s, sa, &auio, control, uap->flags, 717 &uap->sysmsg_result); 718 719 cleanup: 720 if (sa) 721 FREE(sa, M_SONAME); 722 iovec_free(&iov, aiov); 723 return (error); 724 } 725 726 /* 727 * kern_recvmsg() takes a handle to sa and control. If the handle is non- 728 * null, it returns a dynamically allocated struct sockaddr and an mbuf. 729 * Don't forget to FREE() and m_free() these if they are returned. 730 */ 731 int 732 kern_recvmsg(int s, struct sockaddr **sa, struct uio *auio, 733 struct mbuf **control, int *flags, int *res) 734 { 735 struct thread *td = curthread; 736 struct proc *p = td->td_proc; 737 struct file *fp; 738 int len, error; 739 struct socket *so; 740 #ifdef KTRACE 741 struct iovec *ktriov = NULL; 742 struct uio ktruio; 743 #endif 744 745 error = holdsock(p->p_fd, s, &fp); 746 if (error) 747 return (error); 748 if (auio->uio_resid < 0) { 749 error = EINVAL; 750 goto done; 751 } 752 #ifdef KTRACE 753 if (KTRPOINT(td, KTR_GENIO)) { 754 int iovlen = auio->uio_iovcnt * sizeof (struct iovec); 755 756 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 757 bcopy(auio->uio_iov, ktriov, iovlen); 758 ktruio = *auio; 759 } 760 #endif 761 len = auio->uio_resid; 762 so = (struct socket *)fp->f_data; 763 error = so_pru_soreceive(so, sa, auio, NULL, control, flags); 764 if (error) { 765 if (auio->uio_resid != len && (error == ERESTART || 766 error == EINTR || error == EWOULDBLOCK)) 767 error = 0; 768 } 769 #ifdef KTRACE 770 if (ktriov != NULL) { 771 if (error == 0) { 772 ktruio.uio_iov = ktriov; 773 ktruio.uio_resid = len - auio->uio_resid; 774 ktrgenio(p->p_tracep, s, UIO_READ, &ktruio, error); 775 } 776 FREE(ktriov, M_TEMP); 777 } 778 #endif 779 if (error == 0) 780 *res = len - auio->uio_resid; 781 done: 782 fdrop(fp, td); 783 return (error); 784 } 785 786 /* 787 * recvfrom_args(int s, caddr_t buf, size_t len, int flags, 788 * caddr_t from, int *fromlenaddr) 789 */ 790 int 791 recvfrom(struct recvfrom_args *uap) 792 { 793 struct thread *td = curthread; 794 struct uio auio; 795 struct iovec aiov; 796 struct sockaddr *sa = NULL; 797 int error, fromlen; 798 799 if (uap->from && uap->fromlenaddr) { 800 error = copyin(uap->fromlenaddr, &fromlen, sizeof(fromlen)); 801 if (error) 802 return (error); 803 if (fromlen < 0) 804 return (EINVAL); 805 } else { 806 fromlen = 0; 807 } 808 aiov.iov_base = uap->buf; 809 aiov.iov_len = uap->len; 810 auio.uio_iov = &aiov; 811 auio.uio_iovcnt = 1; 812 auio.uio_offset = 0; 813 auio.uio_resid = uap->len; 814 auio.uio_segflg = UIO_USERSPACE; 815 auio.uio_rw = UIO_READ; 816 auio.uio_td = td; 817 818 error = kern_recvmsg(uap->s, uap->from ? &sa : NULL, &auio, NULL, 819 &uap->flags, &uap->sysmsg_result); 820 821 if (error == 0 && uap->from) { 822 /* note: sa may still be NULL */ 823 if (sa) { 824 fromlen = MIN(fromlen, sa->sa_len); 825 error = copyout(sa, uap->from, fromlen); 826 } else { 827 fromlen = 0; 828 } 829 if (error == 0) { 830 error = copyout(&fromlen, uap->fromlenaddr, 831 sizeof(fromlen)); 832 } 833 } 834 if (sa) 835 FREE(sa, M_SONAME); 836 837 return (error); 838 } 839 840 /* 841 * recvmsg_args(int s, struct msghdr *msg, int flags) 842 */ 843 int 844 recvmsg(struct recvmsg_args *uap) 845 { 846 struct thread *td = curthread; 847 struct msghdr msg; 848 struct uio auio; 849 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 850 struct mbuf *m, *control = NULL; 851 struct sockaddr *sa = NULL; 852 caddr_t ctlbuf; 853 socklen_t *ufromlenp, *ucontrollenp; 854 int error, fromlen, controllen, len, flags, *uflagsp; 855 856 /* 857 * This copyin handles everything except the iovec. 858 */ 859 error = copyin(uap->msg, &msg, sizeof(msg)); 860 if (error) 861 return (error); 862 863 if (msg.msg_name && msg.msg_namelen < 0) 864 return (EINVAL); 865 if (msg.msg_control && msg.msg_controllen < 0) 866 return (EINVAL); 867 868 ufromlenp = (socklen_t *)((caddr_t)uap->msg + offsetof(struct msghdr, 869 msg_namelen)); 870 ucontrollenp = (socklen_t *)((caddr_t)uap->msg + offsetof(struct msghdr, 871 msg_controllen)); 872 uflagsp = (int *)((caddr_t)uap->msg + offsetof(struct msghdr, 873 msg_flags)); 874 875 /* 876 * Populate auio. 877 */ 878 error = iovec_copyin(msg.msg_iov, &iov, aiov, msg.msg_iovlen, 879 &auio.uio_resid); 880 if (error) 881 return (error); 882 auio.uio_iov = iov; 883 auio.uio_iovcnt = msg.msg_iovlen; 884 auio.uio_offset = 0; 885 auio.uio_segflg = UIO_USERSPACE; 886 auio.uio_rw = UIO_READ; 887 auio.uio_td = td; 888 889 flags = uap->flags; 890 891 error = kern_recvmsg(uap->s, msg.msg_name ? &sa : NULL, &auio, 892 msg.msg_control ? &control : NULL, &flags, &uap->sysmsg_result); 893 894 /* 895 * Conditionally copyout the name and populate the namelen field. 896 */ 897 if (error == 0 && msg.msg_name) { 898 fromlen = MIN(msg.msg_namelen, sa->sa_len); 899 error = copyout(sa, msg.msg_name, fromlen); 900 if (error == 0) 901 error = copyout(&fromlen, ufromlenp, 902 sizeof(*ufromlenp)); 903 } 904 905 /* 906 * Copyout msg.msg_control and msg.msg_controllen. 907 */ 908 if (error == 0 && msg.msg_control) { 909 len = msg.msg_controllen; 910 m = control; 911 ctlbuf = (caddr_t)msg.msg_control; 912 913 while(m && len > 0) { 914 unsigned int tocopy; 915 916 if (len >= m->m_len) { 917 tocopy = m->m_len; 918 } else { 919 msg.msg_flags |= MSG_CTRUNC; 920 tocopy = len; 921 } 922 923 error = copyout(mtod(m, caddr_t), ctlbuf, tocopy); 924 if (error) 925 goto cleanup; 926 927 ctlbuf += tocopy; 928 len -= tocopy; 929 m = m->m_next; 930 } 931 controllen = ctlbuf - (caddr_t)msg.msg_control; 932 error = copyout(&controllen, ucontrollenp, 933 sizeof(*ucontrollenp)); 934 } 935 936 if (error == 0) 937 error = copyout(&flags, uflagsp, sizeof(*uflagsp)); 938 939 cleanup: 940 if (sa) 941 FREE(sa, M_SONAME); 942 iovec_free(&iov, aiov); 943 if (control) 944 m_freem(control); 945 return (error); 946 } 947 948 /* 949 * shutdown_args(int s, int how) 950 */ 951 int 952 kern_shutdown(int s, int how) 953 { 954 struct thread *td = curthread; 955 struct proc *p = td->td_proc; 956 struct file *fp; 957 int error; 958 959 KKASSERT(p); 960 error = holdsock(p->p_fd, s, &fp); 961 if (error) 962 return (error); 963 error = soshutdown((struct socket *)fp->f_data, how); 964 fdrop(fp, td); 965 return(error); 966 } 967 968 int 969 shutdown(struct shutdown_args *uap) 970 { 971 int error; 972 973 error = kern_shutdown(uap->s, uap->how); 974 975 return (error); 976 } 977 978 /* 979 * If sopt->sopt_td == NULL, then sopt->sopt_val is treated as an 980 * in kernel pointer instead of a userland pointer. This allows us 981 * to manipulate socket options in the emulation code. 982 */ 983 int 984 kern_setsockopt(int s, struct sockopt *sopt) 985 { 986 struct thread *td = curthread; 987 struct proc *p = td->td_proc; 988 struct file *fp; 989 int error; 990 991 if (sopt->sopt_val == 0 && sopt->sopt_valsize != 0) 992 return (EFAULT); 993 if (sopt->sopt_valsize < 0) 994 return (EINVAL); 995 996 error = holdsock(p->p_fd, s, &fp); 997 if (error) 998 return (error); 999 1000 error = sosetopt((struct socket *)fp->f_data, sopt); 1001 fdrop(fp, td); 1002 return (error); 1003 } 1004 1005 /* 1006 * setsockopt_args(int s, int level, int name, caddr_t val, int valsize) 1007 */ 1008 int 1009 setsockopt(struct setsockopt_args *uap) 1010 { 1011 struct thread *td = curthread; 1012 struct sockopt sopt; 1013 int error; 1014 1015 sopt.sopt_dir = SOPT_SET; 1016 sopt.sopt_level = uap->level; 1017 sopt.sopt_name = uap->name; 1018 sopt.sopt_val = uap->val; 1019 sopt.sopt_valsize = uap->valsize; 1020 sopt.sopt_td = td; 1021 1022 error = kern_setsockopt(uap->s, &sopt); 1023 return(error); 1024 } 1025 1026 /* 1027 * If sopt->sopt_td == NULL, then sopt->sopt_val is treated as an 1028 * in kernel pointer instead of a userland pointer. This allows us 1029 * to manipulate socket options in the emulation code. 1030 */ 1031 int 1032 kern_getsockopt(int s, struct sockopt *sopt) 1033 { 1034 struct thread *td = curthread; 1035 struct proc *p = td->td_proc; 1036 struct file *fp; 1037 int error; 1038 1039 if (sopt->sopt_val == 0 && sopt->sopt_valsize != 0) 1040 return (EFAULT); 1041 if (sopt->sopt_valsize < 0) 1042 return (EINVAL); 1043 1044 error = holdsock(p->p_fd, s, &fp); 1045 if (error) 1046 return (error); 1047 1048 error = sogetopt((struct socket *)fp->f_data, sopt); 1049 fdrop(fp, td); 1050 return (error); 1051 } 1052 1053 /* 1054 * getsockopt_Args(int s, int level, int name, caddr_t val, int *avalsize) 1055 */ 1056 int 1057 getsockopt(struct getsockopt_args *uap) 1058 { 1059 struct thread *td = curthread; 1060 struct sockopt sopt; 1061 int error, valsize; 1062 1063 if (uap->val) { 1064 error = copyin(uap->avalsize, &valsize, sizeof(valsize)); 1065 if (error) 1066 return (error); 1067 if (valsize < 0) 1068 return (EINVAL); 1069 } else { 1070 valsize = 0; 1071 } 1072 1073 sopt.sopt_dir = SOPT_GET; 1074 sopt.sopt_level = uap->level; 1075 sopt.sopt_name = uap->name; 1076 sopt.sopt_val = uap->val; 1077 sopt.sopt_valsize = valsize; 1078 sopt.sopt_td = td; 1079 1080 error = kern_getsockopt(uap->s, &sopt); 1081 if (error == 0) { 1082 valsize = sopt.sopt_valsize; 1083 error = copyout(&valsize, uap->avalsize, sizeof(valsize)); 1084 } 1085 return (error); 1086 } 1087 1088 /* 1089 * The second argument to kern_getsockname() is a handle to a struct sockaddr. 1090 * This allows kern_getsockname() to return a pointer to an allocated struct 1091 * sockaddr which must be freed later with FREE(). The caller must 1092 * initialize *name to NULL. 1093 */ 1094 int 1095 kern_getsockname(int s, struct sockaddr **name, int *namelen) 1096 { 1097 struct thread *td = curthread; 1098 struct proc *p = td->td_proc; 1099 struct file *fp; 1100 struct socket *so; 1101 struct sockaddr *sa = NULL; 1102 int error; 1103 1104 error = holdsock(p->p_fd, s, &fp); 1105 if (error) 1106 return (error); 1107 if (*namelen < 0) { 1108 fdrop(fp, td); 1109 return (EINVAL); 1110 } 1111 so = (struct socket *)fp->f_data; 1112 error = so_pru_sockaddr(so, &sa); 1113 if (error == 0) { 1114 if (sa == 0) { 1115 *namelen = 0; 1116 } else { 1117 *namelen = MIN(*namelen, sa->sa_len); 1118 *name = sa; 1119 } 1120 } 1121 1122 fdrop(fp, td); 1123 return (error); 1124 } 1125 1126 /* 1127 * getsockname_args(int fdes, caddr_t asa, int *alen) 1128 * 1129 * Get socket name. 1130 */ 1131 int 1132 getsockname(struct getsockname_args *uap) 1133 { 1134 struct sockaddr *sa = NULL; 1135 int error, sa_len; 1136 1137 error = copyin(uap->alen, &sa_len, sizeof(sa_len)); 1138 if (error) 1139 return (error); 1140 1141 error = kern_getsockname(uap->fdes, &sa, &sa_len); 1142 1143 if (error == 0) 1144 error = copyout(sa, uap->asa, sa_len); 1145 if (error == 0) 1146 error = copyout(&sa_len, uap->alen, sizeof(*uap->alen)); 1147 if (sa) 1148 FREE(sa, M_SONAME); 1149 return (error); 1150 } 1151 1152 /* 1153 * The second argument to kern_getpeername() is a handle to a struct sockaddr. 1154 * This allows kern_getpeername() to return a pointer to an allocated struct 1155 * sockaddr which must be freed later with FREE(). The caller must 1156 * initialize *name to NULL. 1157 */ 1158 int 1159 kern_getpeername(int s, struct sockaddr **name, int *namelen) 1160 { 1161 struct thread *td = curthread; 1162 struct proc *p = td->td_proc; 1163 struct file *fp; 1164 struct socket *so; 1165 struct sockaddr *sa = NULL; 1166 int error; 1167 1168 error = holdsock(p->p_fd, s, &fp); 1169 if (error) 1170 return (error); 1171 if (*namelen < 0) { 1172 fdrop(fp, td); 1173 return (EINVAL); 1174 } 1175 so = (struct socket *)fp->f_data; 1176 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1177 fdrop(fp, td); 1178 return (ENOTCONN); 1179 } 1180 error = so_pru_peeraddr(so, &sa); 1181 if (error == 0) { 1182 if (sa == 0) { 1183 *namelen = 0; 1184 } else { 1185 *namelen = MIN(*namelen, sa->sa_len); 1186 *name = sa; 1187 } 1188 } 1189 1190 fdrop(fp, td); 1191 return (error); 1192 } 1193 1194 /* 1195 * getpeername_args(int fdes, caddr_t asa, int *alen) 1196 * 1197 * Get name of peer for connected socket. 1198 */ 1199 int 1200 getpeername(struct getpeername_args *uap) 1201 { 1202 struct sockaddr *sa = NULL; 1203 int error, sa_len; 1204 1205 error = copyin(uap->alen, &sa_len, sizeof(sa_len)); 1206 if (error) 1207 return (error); 1208 1209 error = kern_getpeername(uap->fdes, &sa, &sa_len); 1210 1211 if (error == 0) 1212 error = copyout(sa, uap->asa, sa_len); 1213 if (error == 0) 1214 error = copyout(&sa_len, uap->alen, sizeof(*uap->alen)); 1215 if (sa) 1216 FREE(sa, M_SONAME); 1217 return (error); 1218 } 1219 1220 int 1221 getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len) 1222 { 1223 struct sockaddr *sa; 1224 int error; 1225 1226 *namp = NULL; 1227 if (len > SOCK_MAXADDRLEN) 1228 return ENAMETOOLONG; 1229 if (len < offsetof(struct sockaddr, sa_data[0])) 1230 return EDOM; 1231 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1232 error = copyin(uaddr, sa, len); 1233 if (error) { 1234 FREE(sa, M_SONAME); 1235 } else { 1236 #if BYTE_ORDER != BIG_ENDIAN 1237 /* 1238 * The bind(), connect(), and sendto() syscalls were not 1239 * versioned for COMPAT_43. Thus, this check must stay. 1240 */ 1241 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1242 sa->sa_family = sa->sa_len; 1243 #endif 1244 sa->sa_len = len; 1245 *namp = sa; 1246 } 1247 return error; 1248 } 1249 1250 /* 1251 * holdsock() - load the struct file pointer associated 1252 * with a socket into *fpp. If an error occurs, non-zero 1253 * will be returned and *fpp will be set to NULL. 1254 */ 1255 int 1256 holdsock(fdp, fdes, fpp) 1257 struct filedesc *fdp; 1258 int fdes; 1259 struct file **fpp; 1260 { 1261 struct file *fp = NULL; 1262 int error = 0; 1263 1264 if ((unsigned)fdes >= fdp->fd_nfiles || 1265 (fp = fdp->fd_ofiles[fdes]) == NULL) { 1266 error = EBADF; 1267 } else if (fp->f_type != DTYPE_SOCKET) { 1268 error = ENOTSOCK; 1269 fp = NULL; 1270 } else { 1271 fhold(fp); 1272 } 1273 *fpp = fp; 1274 return(error); 1275 } 1276 1277 /* 1278 * Detach a mapped page and release resources back to the system. 1279 * We must release our wiring and if the object is ripped out 1280 * from under the vm_page we become responsible for freeing the 1281 * page. 1282 * 1283 * XXX HACK XXX TEMPORARY UNTIL WE IMPLEMENT EXT MBUF REFERENCE COUNTING 1284 */ 1285 static void 1286 sf_buf_mref(void *arg) 1287 { 1288 struct sfbuf_mref *sfm = arg; 1289 1290 ++sfm->mref_count; 1291 } 1292 1293 static void 1294 sf_buf_mfree(void *arg) 1295 { 1296 struct sfbuf_mref *sfm = arg; 1297 vm_page_t m; 1298 1299 KKASSERT(sfm->mref_count > 0); 1300 if (--sfm->mref_count == 0) { 1301 m = sf_buf_page(sfm->sf); 1302 sf_buf_free(sfm->sf); 1303 crit_enter(); 1304 vm_page_unwire(m, 0); 1305 if (m->wire_count == 0 && m->object == NULL) 1306 vm_page_try_to_free(m); 1307 crit_exit(); 1308 free(sfm, M_SENDFILE); 1309 } 1310 } 1311 1312 /* 1313 * sendfile(2). 1314 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1315 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1316 * 1317 * Send a file specified by 'fd' and starting at 'offset' to a socket 1318 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1319 * nbytes == 0. Optionally add a header and/or trailer to the socket 1320 * output. If specified, write the total number of bytes sent into *sbytes. 1321 * 1322 * In FreeBSD kern/uipc_syscalls.c,v 1.103, a bug was fixed that caused 1323 * the headers to count against the remaining bytes to be sent from 1324 * the file descriptor. We may wish to implement a compatibility syscall 1325 * in the future. 1326 */ 1327 int 1328 sendfile(struct sendfile_args *uap) 1329 { 1330 struct thread *td = curthread; 1331 struct proc *p = td->td_proc; 1332 struct file *fp; 1333 struct filedesc *fdp; 1334 struct vnode *vp = NULL; 1335 struct sf_hdtr hdtr; 1336 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 1337 struct uio auio; 1338 struct mbuf *mheader = NULL; 1339 off_t hdtr_size = 0, sbytes; 1340 int error, hbytes = 0, tbytes; 1341 1342 KKASSERT(p); 1343 fdp = p->p_fd; 1344 1345 /* 1346 * Do argument checking. Must be a regular file in, stream 1347 * type and connected socket out, positive offset. 1348 */ 1349 fp = holdfp(fdp, uap->fd, FREAD); 1350 if (fp == NULL) { 1351 return (EBADF); 1352 } 1353 if (fp->f_type != DTYPE_VNODE) { 1354 fdrop(fp, td); 1355 return (EINVAL); 1356 } 1357 vp = (struct vnode *)fp->f_data; 1358 vref(vp); 1359 fdrop(fp, td); 1360 1361 /* 1362 * If specified, get the pointer to the sf_hdtr struct for 1363 * any headers/trailers. 1364 */ 1365 if (uap->hdtr) { 1366 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1367 if (error) 1368 goto done; 1369 /* 1370 * Send any headers. 1371 */ 1372 if (hdtr.headers) { 1373 error = iovec_copyin(hdtr.headers, &iov, aiov, 1374 hdtr.hdr_cnt, &hbytes); 1375 if (error) 1376 goto done; 1377 auio.uio_iov = iov; 1378 auio.uio_iovcnt = hdtr.hdr_cnt; 1379 auio.uio_offset = 0; 1380 auio.uio_segflg = UIO_USERSPACE; 1381 auio.uio_rw = UIO_WRITE; 1382 auio.uio_td = td; 1383 auio.uio_resid = hbytes; 1384 1385 mheader = m_uiomove(&auio, MB_WAIT, 0); 1386 1387 iovec_free(&iov, aiov); 1388 if (mheader == NULL) 1389 goto done; 1390 } 1391 } 1392 1393 error = kern_sendfile(vp, uap->s, uap->offset, uap->nbytes, mheader, 1394 &sbytes, uap->flags); 1395 if (error) 1396 goto done; 1397 1398 /* 1399 * Send trailers. Wimp out and use writev(2). 1400 */ 1401 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1402 error = iovec_copyin(hdtr.trailers, &iov, aiov, 1403 hdtr.trl_cnt, &auio.uio_resid); 1404 if (error) 1405 goto done; 1406 auio.uio_iov = iov; 1407 auio.uio_iovcnt = hdtr.trl_cnt; 1408 auio.uio_offset = 0; 1409 auio.uio_segflg = UIO_USERSPACE; 1410 auio.uio_rw = UIO_WRITE; 1411 auio.uio_td = td; 1412 1413 error = kern_sendmsg(uap->s, NULL, &auio, NULL, 0, &tbytes); 1414 1415 iovec_free(&iov, aiov); 1416 if (error) 1417 goto done; 1418 hdtr_size += tbytes; /* trailer bytes successfully sent */ 1419 } 1420 1421 done: 1422 if (uap->sbytes != NULL) { 1423 sbytes += hdtr_size; 1424 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1425 } 1426 if (vp) 1427 vrele(vp); 1428 return (error); 1429 } 1430 1431 int 1432 kern_sendfile(struct vnode *vp, int sfd, off_t offset, size_t nbytes, 1433 struct mbuf *mheader, off_t *sbytes, int flags) 1434 { 1435 struct thread *td = curthread; 1436 struct proc *p = td->td_proc; 1437 struct vm_object *obj; 1438 struct socket *so; 1439 struct file *fp; 1440 struct mbuf *m; 1441 struct sf_buf *sf; 1442 struct sfbuf_mref *sfm; 1443 struct vm_page *pg; 1444 off_t off, xfsize; 1445 off_t hbytes = 0; 1446 int error = 0; 1447 1448 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1449 error = EINVAL; 1450 goto done; 1451 } 1452 error = holdsock(p->p_fd, sfd, &fp); 1453 if (error) 1454 goto done; 1455 so = (struct socket *)fp->f_data; 1456 if (so->so_type != SOCK_STREAM) { 1457 error = EINVAL; 1458 goto done; 1459 } 1460 if ((so->so_state & SS_ISCONNECTED) == 0) { 1461 error = ENOTCONN; 1462 goto done; 1463 } 1464 if (offset < 0) { 1465 error = EINVAL; 1466 goto done; 1467 } 1468 1469 *sbytes = 0; 1470 /* 1471 * Protect against multiple writers to the socket. 1472 */ 1473 (void) sblock(&so->so_snd, M_WAITOK); 1474 1475 /* 1476 * Loop through the pages in the file, starting with the requested 1477 * offset. Get a file page (do I/O if necessary), map the file page 1478 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1479 * it on the socket. 1480 */ 1481 for (off = offset; ; off += xfsize, *sbytes += xfsize + hbytes) { 1482 vm_pindex_t pindex; 1483 vm_offset_t pgoff; 1484 1485 pindex = OFF_TO_IDX(off); 1486 retry_lookup: 1487 /* 1488 * Calculate the amount to transfer. Not to exceed a page, 1489 * the EOF, or the passed in nbytes. 1490 */ 1491 xfsize = obj->un_pager.vnp.vnp_size - off; 1492 if (xfsize > PAGE_SIZE) 1493 xfsize = PAGE_SIZE; 1494 pgoff = (vm_offset_t)(off & PAGE_MASK); 1495 if (PAGE_SIZE - pgoff < xfsize) 1496 xfsize = PAGE_SIZE - pgoff; 1497 if (nbytes && xfsize > (nbytes - *sbytes)) 1498 xfsize = nbytes - *sbytes; 1499 if (xfsize <= 0) 1500 break; 1501 /* 1502 * Optimize the non-blocking case by looking at the socket space 1503 * before going to the extra work of constituting the sf_buf. 1504 */ 1505 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1506 if (so->so_state & SS_CANTSENDMORE) 1507 error = EPIPE; 1508 else 1509 error = EAGAIN; 1510 sbunlock(&so->so_snd); 1511 goto done; 1512 } 1513 /* 1514 * Attempt to look up the page. 1515 * 1516 * Allocate if not found, wait and loop if busy, then 1517 * wire the page. critical section protection is 1518 * required to maintain the object association (an 1519 * interrupt can free the page) through to the 1520 * vm_page_wire() call. 1521 */ 1522 crit_enter(); 1523 pg = vm_page_lookup(obj, pindex); 1524 if (pg == NULL) { 1525 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL); 1526 if (pg == NULL) { 1527 vm_wait(); 1528 crit_exit(); 1529 goto retry_lookup; 1530 } 1531 vm_page_wakeup(pg); 1532 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) { 1533 crit_exit(); 1534 goto retry_lookup; 1535 } 1536 vm_page_wire(pg); 1537 crit_exit(); 1538 1539 /* 1540 * If page is not valid for what we need, initiate I/O 1541 */ 1542 1543 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1544 struct uio auio; 1545 struct iovec aiov; 1546 int bsize; 1547 1548 /* 1549 * Ensure that our page is still around when the I/O 1550 * completes. 1551 */ 1552 vm_page_io_start(pg); 1553 1554 /* 1555 * Get the page from backing store. 1556 */ 1557 bsize = vp->v_mount->mnt_stat.f_iosize; 1558 auio.uio_iov = &aiov; 1559 auio.uio_iovcnt = 1; 1560 aiov.iov_base = 0; 1561 aiov.iov_len = MAXBSIZE; 1562 auio.uio_resid = MAXBSIZE; 1563 auio.uio_offset = trunc_page(off); 1564 auio.uio_segflg = UIO_NOCOPY; 1565 auio.uio_rw = UIO_READ; 1566 auio.uio_td = td; 1567 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1568 error = VOP_READ(vp, &auio, 1569 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1570 p->p_ucred); 1571 VOP_UNLOCK(vp, 0, td); 1572 vm_page_flag_clear(pg, PG_ZERO); 1573 vm_page_io_finish(pg); 1574 if (error) { 1575 crit_enter(); 1576 vm_page_unwire(pg, 0); 1577 vm_page_try_to_free(pg); 1578 crit_exit(); 1579 sbunlock(&so->so_snd); 1580 goto done; 1581 } 1582 } 1583 1584 1585 /* 1586 * Get a sendfile buf. We usually wait as long as necessary, 1587 * but this wait can be interrupted. 1588 */ 1589 if ((sf = sf_buf_alloc(pg, SFBA_PCATCH)) == NULL) { 1590 crit_enter(); 1591 vm_page_unwire(pg, 0); 1592 vm_page_try_to_free(pg); 1593 crit_exit(); 1594 sbunlock(&so->so_snd); 1595 error = EINTR; 1596 goto done; 1597 } 1598 1599 /* 1600 * Get an mbuf header and set it up as having external storage. 1601 */ 1602 MGETHDR(m, MB_WAIT, MT_DATA); 1603 if (m == NULL) { 1604 error = ENOBUFS; 1605 sf_buf_free(sf); 1606 sbunlock(&so->so_snd); 1607 goto done; 1608 } 1609 1610 /* 1611 * sfm is a temporary hack, use a per-cpu cache for this. 1612 */ 1613 sfm = malloc(sizeof(struct sfbuf_mref), M_SENDFILE, M_WAITOK); 1614 sfm->sf = sf; 1615 sfm->mref_count = 1; 1616 1617 m->m_ext.ext_nfree.new = sf_buf_mfree; 1618 m->m_ext.ext_nref.new = sf_buf_mref; 1619 m->m_ext.ext_arg = sfm; 1620 m->m_ext.ext_buf = (void *)sf->kva; 1621 m->m_ext.ext_size = PAGE_SIZE; 1622 m->m_data = (char *) sf->kva + pgoff; 1623 m->m_flags |= M_EXT; 1624 m->m_pkthdr.len = m->m_len = xfsize; 1625 KKASSERT((m->m_flags & (M_EXT_OLD|M_EXT_CLUSTER)) == 0); 1626 1627 if (mheader != NULL) { 1628 hbytes = mheader->m_pkthdr.len; 1629 mheader->m_pkthdr.len += m->m_pkthdr.len; 1630 m_cat(mheader, m); 1631 m = mheader; 1632 mheader = NULL; 1633 } else 1634 hbytes = 0; 1635 1636 /* 1637 * Add the buffer to the socket buffer chain. 1638 */ 1639 crit_enter(); 1640 retry_space: 1641 /* 1642 * Make sure that the socket is still able to take more data. 1643 * CANTSENDMORE being true usually means that the connection 1644 * was closed. so_error is true when an error was sensed after 1645 * a previous send. 1646 * The state is checked after the page mapping and buffer 1647 * allocation above since those operations may block and make 1648 * any socket checks stale. From this point forward, nothing 1649 * blocks before the pru_send (or more accurately, any blocking 1650 * results in a loop back to here to re-check). 1651 */ 1652 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1653 if (so->so_state & SS_CANTSENDMORE) { 1654 error = EPIPE; 1655 } else { 1656 error = so->so_error; 1657 so->so_error = 0; 1658 } 1659 m_freem(m); 1660 sbunlock(&so->so_snd); 1661 crit_exit(); 1662 goto done; 1663 } 1664 /* 1665 * Wait for socket space to become available. We do this just 1666 * after checking the connection state above in order to avoid 1667 * a race condition with sbwait(). 1668 */ 1669 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1670 if (so->so_state & SS_NBIO) { 1671 m_freem(m); 1672 sbunlock(&so->so_snd); 1673 crit_exit(); 1674 error = EAGAIN; 1675 goto done; 1676 } 1677 error = sbwait(&so->so_snd); 1678 /* 1679 * An error from sbwait usually indicates that we've 1680 * been interrupted by a signal. If we've sent anything 1681 * then return bytes sent, otherwise return the error. 1682 */ 1683 if (error) { 1684 m_freem(m); 1685 sbunlock(&so->so_snd); 1686 crit_exit(); 1687 goto done; 1688 } 1689 goto retry_space; 1690 } 1691 error = so_pru_send(so, 0, m, NULL, NULL, td); 1692 crit_exit(); 1693 if (error) { 1694 sbunlock(&so->so_snd); 1695 goto done; 1696 } 1697 } 1698 if (mheader != NULL) { 1699 *sbytes += mheader->m_pkthdr.len; 1700 error = so_pru_send(so, 0, mheader, NULL, NULL, td); 1701 mheader = NULL; 1702 } 1703 sbunlock(&so->so_snd); 1704 1705 done: 1706 if (fp) 1707 fdrop(fp, td); 1708 if (mheader != NULL) 1709 m_freem(mheader); 1710 return (error); 1711 } 1712