1 /* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.65.2.17 2003/04/04 17:11:16 tegge Exp $ 38 * $DragonFly: src/sys/kern/uipc_syscalls.c,v 1.38 2004/06/06 05:59:44 hsu Exp $ 39 */ 40 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/sysproto.h> 47 #include <sys/malloc.h> 48 #include <sys/filedesc.h> 49 #include <sys/event.h> 50 #include <sys/proc.h> 51 #include <sys/fcntl.h> 52 #include <sys/file.h> 53 #include <sys/filio.h> 54 #include <sys/kern_syscall.h> 55 #include <sys/mbuf.h> 56 #include <sys/protosw.h> 57 #include <sys/sfbuf.h> 58 #include <sys/socket.h> 59 #include <sys/socketvar.h> 60 #include <sys/socketops.h> 61 #include <sys/uio.h> 62 #include <sys/vnode.h> 63 #include <sys/lock.h> 64 #include <sys/mount.h> 65 #ifdef KTRACE 66 #include <sys/ktrace.h> 67 #endif 68 #include <vm/vm.h> 69 #include <vm/vm_object.h> 70 #include <vm/vm_page.h> 71 #include <vm/vm_pageout.h> 72 #include <vm/vm_kern.h> 73 #include <vm/vm_extern.h> 74 #include <sys/file2.h> 75 #include <sys/signalvar.h> 76 77 #include <sys/thread2.h> 78 #include <sys/msgport2.h> 79 80 /* 81 * System call interface to the socket abstraction. 82 */ 83 84 extern struct fileops socketops; 85 86 /* 87 * socket_args(int domain, int type, int protocol) 88 */ 89 int 90 kern_socket(int domain, int type, int protocol, int *res) 91 { 92 struct thread *td = curthread; 93 struct proc *p = td->td_proc; 94 struct filedesc *fdp; 95 struct socket *so; 96 struct file *fp; 97 int fd, error; 98 99 KKASSERT(p); 100 fdp = p->p_fd; 101 102 error = falloc(p, &fp, &fd); 103 if (error) 104 return (error); 105 fhold(fp); 106 error = socreate(domain, &so, type, protocol, td); 107 if (error) { 108 if (fdp->fd_ofiles[fd] == fp) { 109 fdp->fd_ofiles[fd] = NULL; 110 fdrop(fp, td); 111 } 112 } else { 113 fp->f_data = (caddr_t)so; 114 fp->f_flag = FREAD|FWRITE; 115 fp->f_ops = &socketops; 116 fp->f_type = DTYPE_SOCKET; 117 *res = fd; 118 } 119 fdrop(fp, td); 120 return (error); 121 } 122 123 int 124 socket(struct socket_args *uap) 125 { 126 int error; 127 128 error = kern_socket(uap->domain, uap->type, uap->protocol, 129 &uap->sysmsg_result); 130 131 return (error); 132 } 133 134 int 135 kern_bind(int s, struct sockaddr *sa) 136 { 137 struct thread *td = curthread; 138 struct proc *p = td->td_proc; 139 struct file *fp; 140 int error; 141 142 KKASSERT(p); 143 error = holdsock(p->p_fd, s, &fp); 144 if (error) 145 return (error); 146 error = sobind((struct socket *)fp->f_data, sa, td); 147 fdrop(fp, td); 148 return (error); 149 } 150 151 /* 152 * bind_args(int s, caddr_t name, int namelen) 153 */ 154 int 155 bind(struct bind_args *uap) 156 { 157 struct sockaddr *sa; 158 int error; 159 160 error = getsockaddr(&sa, uap->name, uap->namelen); 161 if (error) 162 return (error); 163 error = kern_bind(uap->s, sa); 164 FREE(sa, M_SONAME); 165 166 return (error); 167 } 168 169 int 170 kern_listen(int s, int backlog) 171 { 172 struct thread *td = curthread; 173 struct proc *p = td->td_proc; 174 struct file *fp; 175 int error; 176 177 KKASSERT(p); 178 error = holdsock(p->p_fd, s, &fp); 179 if (error) 180 return (error); 181 error = solisten((struct socket *)fp->f_data, backlog, td); 182 fdrop(fp, td); 183 return(error); 184 } 185 186 /* 187 * listen_args(int s, int backlog) 188 */ 189 int 190 listen(struct listen_args *uap) 191 { 192 int error; 193 194 error = kern_listen(uap->s, uap->backlog); 195 return (error); 196 } 197 198 /* 199 * Returns the accepted socket as well. 200 */ 201 static boolean_t 202 soaccept_predicate(struct netmsg *msg0) 203 { 204 struct netmsg_so_notify *msg = (struct netmsg_so_notify *)msg0; 205 struct socket *head = msg->nm_so; 206 207 if (head->so_error != 0) { 208 msg->nm_lmsg.ms_error = head->so_error; 209 return (TRUE); 210 } 211 if (!TAILQ_EMPTY(&head->so_comp)) { 212 /* Abuse nm_so field as copy in/copy out parameter. XXX JH */ 213 msg->nm_so = TAILQ_FIRST(&head->so_comp); 214 TAILQ_REMOVE(&head->so_comp, msg->nm_so, so_list); 215 head->so_qlen--; 216 217 msg->nm_lmsg.ms_error = 0; 218 return (TRUE); 219 } 220 if (head->so_state & SS_CANTRCVMORE) { 221 msg->nm_lmsg.ms_error = ECONNABORTED; 222 return (TRUE); 223 } 224 if (head->so_state & SS_NBIO) { 225 msg->nm_lmsg.ms_error = EWOULDBLOCK; 226 return (TRUE); 227 } 228 229 return (FALSE); 230 } 231 232 /* 233 * The second argument to kern_accept() is a handle to a struct sockaddr. 234 * This allows kern_accept() to return a pointer to an allocated struct 235 * sockaddr which must be freed later with FREE(). The caller must 236 * initialize *name to NULL. 237 */ 238 int 239 kern_accept(int s, struct sockaddr **name, int *namelen, int *res) 240 { 241 struct thread *td = curthread; 242 struct proc *p = td->td_proc; 243 struct filedesc *fdp = p->p_fd; 244 struct file *lfp = NULL; 245 struct file *nfp = NULL; 246 struct sockaddr *sa; 247 struct socket *head, *so; 248 struct netmsg_so_notify msg; 249 lwkt_port_t port; 250 int fd; 251 u_int fflag; /* type must match fp->f_flag */ 252 int error, tmp; 253 254 if (name && namelen && *namelen < 0) 255 return (EINVAL); 256 257 error = holdsock(fdp, s, &lfp); 258 if (error) 259 return (error); 260 261 error = falloc(p, &nfp, &fd); 262 if (error) { /* Probably ran out of file descriptors. */ 263 *res = -1; 264 fdrop(lfp, td); 265 return (error); 266 } 267 fhold(nfp); 268 *res = fd; 269 270 head = (struct socket *)lfp->f_data; 271 if ((head->so_options & SO_ACCEPTCONN) == 0) { 272 error = EINVAL; 273 goto done; 274 } 275 276 /* optimize for uniprocessor case later XXX JH */ 277 port = head->so_proto->pr_mport(head, NULL, PRU_PRED); 278 lwkt_initmsg(&msg.nm_lmsg, &curthread->td_msgport, 279 MSGF_PCATCH | MSGF_ABORTABLE, 280 lwkt_cmd_func(netmsg_so_notify), 281 lwkt_cmd_func(netmsg_so_notify_abort)); 282 msg.nm_predicate = soaccept_predicate; 283 msg.nm_so = head; 284 msg.nm_etype = NM_REVENT; 285 error = lwkt_domsg(port, &msg.nm_lmsg); 286 if (error) 287 goto done; 288 289 /* 290 * At this point we have the connection that's ready to be accepted. 291 */ 292 so = msg.nm_so; 293 294 fflag = lfp->f_flag; 295 296 /* connection has been removed from the listen queue */ 297 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 298 299 so->so_state &= ~SS_COMP; 300 so->so_head = NULL; 301 if (head->so_sigio != NULL) 302 fsetown(fgetown(head->so_sigio), &so->so_sigio); 303 304 nfp->f_data = (caddr_t)so; 305 nfp->f_flag = fflag; 306 nfp->f_ops = &socketops; 307 nfp->f_type = DTYPE_SOCKET; 308 /* Sync socket nonblocking/async state with file flags */ 309 tmp = fflag & FNONBLOCK; 310 (void) fo_ioctl(nfp, FIONBIO, (caddr_t)&tmp, td); 311 tmp = fflag & FASYNC; 312 (void) fo_ioctl(nfp, FIOASYNC, (caddr_t)&tmp, td); 313 314 sa = NULL; 315 error = soaccept(so, &sa); 316 317 /* 318 * Set the returned name and namelen as applicable. Set the returned 319 * namelen to 0 for older code which might ignore the return value 320 * from accept. 321 */ 322 if (error == 0) { 323 if (sa && name && namelen) { 324 if (*namelen > sa->sa_len) 325 *namelen = sa->sa_len; 326 *name = sa; 327 } else { 328 if (sa) 329 FREE(sa, M_SONAME); 330 } 331 } 332 333 done: 334 /* 335 * close the new descriptor, assuming someone hasn't ripped it 336 * out from under us. Note that *res is normally ignored if an 337 * error is returned but a syscall message will still have access 338 * to the result code. 339 */ 340 if (error) { 341 *res = -1; 342 if (fdp->fd_ofiles[fd] == nfp) { 343 fdp->fd_ofiles[fd] = NULL; 344 fdrop(nfp, td); 345 } 346 } 347 348 /* 349 * Release explicitly held references before returning. 350 */ 351 if (nfp != NULL) 352 fdrop(nfp, td); 353 fdrop(lfp, td); 354 return (error); 355 } 356 357 /* 358 * accept_args(int s, caddr_t name, int *anamelen) 359 */ 360 int 361 accept(struct accept_args *uap) 362 { 363 struct sockaddr *sa = NULL; 364 int sa_len; 365 int error; 366 367 if (uap->name) { 368 error = copyin(uap->anamelen, &sa_len, sizeof(sa_len)); 369 if (error) 370 return (error); 371 372 error = kern_accept(uap->s, &sa, &sa_len, &uap->sysmsg_result); 373 374 if (error == 0) 375 error = copyout(sa, uap->name, sa_len); 376 if (error == 0) { 377 error = copyout(&sa_len, uap->anamelen, 378 sizeof(*uap->anamelen)); 379 } 380 if (sa) 381 FREE(sa, M_SONAME); 382 } else { 383 error = kern_accept(uap->s, NULL, 0, &uap->sysmsg_result); 384 } 385 return (error); 386 } 387 388 /* 389 * Returns TRUE if predicate satisfied. 390 */ 391 static boolean_t 392 soconnected_predicate(struct netmsg *msg0) 393 { 394 struct netmsg_so_notify *msg = (struct netmsg_so_notify *)msg0; 395 struct socket *so = msg->nm_so; 396 397 /* check predicate */ 398 if (!(so->so_state & SS_ISCONNECTING) || so->so_error != 0) { 399 msg->nm_lmsg.ms_error = so->so_error; 400 return (TRUE); 401 } 402 403 return (FALSE); 404 } 405 406 int 407 kern_connect(int s, struct sockaddr *sa) 408 { 409 struct thread *td = curthread; 410 struct proc *p = td->td_proc; 411 struct file *fp; 412 struct socket *so; 413 int error; 414 415 error = holdsock(p->p_fd, s, &fp); 416 if (error) 417 return (error); 418 so = (struct socket *)fp->f_data; 419 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 420 error = EALREADY; 421 goto done; 422 } 423 error = soconnect(so, sa, td); 424 if (error) 425 goto bad; 426 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 427 error = EINPROGRESS; 428 goto done; 429 } 430 if ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 431 struct netmsg_so_notify msg; 432 lwkt_port_t port; 433 434 port = so->so_proto->pr_mport(so, sa, PRU_PRED); 435 lwkt_initmsg(&msg.nm_lmsg, 436 &curthread->td_msgport, 437 MSGF_PCATCH | MSGF_ABORTABLE, 438 lwkt_cmd_func(netmsg_so_notify), 439 lwkt_cmd_func(netmsg_so_notify_abort)); 440 msg.nm_predicate = soconnected_predicate; 441 msg.nm_so = so; 442 msg.nm_etype = NM_REVENT; 443 error = lwkt_domsg(port, &msg.nm_lmsg); 444 } 445 if (error == 0) { 446 error = so->so_error; 447 so->so_error = 0; 448 } 449 bad: 450 so->so_state &= ~SS_ISCONNECTING; 451 if (error == ERESTART) 452 error = EINTR; 453 done: 454 fdrop(fp, td); 455 return (error); 456 } 457 458 /* 459 * connect_args(int s, caddr_t name, int namelen) 460 */ 461 int 462 connect(struct connect_args *uap) 463 { 464 struct sockaddr *sa; 465 int error; 466 467 error = getsockaddr(&sa, uap->name, uap->namelen); 468 if (error) 469 return (error); 470 error = kern_connect(uap->s, sa); 471 FREE(sa, M_SONAME); 472 473 return (error); 474 } 475 476 int 477 kern_socketpair(int domain, int type, int protocol, int *sv) 478 { 479 struct thread *td = curthread; 480 struct proc *p = td->td_proc; 481 struct filedesc *fdp; 482 struct file *fp1, *fp2; 483 struct socket *so1, *so2; 484 int fd, error; 485 486 KKASSERT(p); 487 fdp = p->p_fd; 488 error = socreate(domain, &so1, type, protocol, td); 489 if (error) 490 return (error); 491 error = socreate(domain, &so2, type, protocol, td); 492 if (error) 493 goto free1; 494 error = falloc(p, &fp1, &fd); 495 if (error) 496 goto free2; 497 fhold(fp1); 498 sv[0] = fd; 499 fp1->f_data = (caddr_t)so1; 500 error = falloc(p, &fp2, &fd); 501 if (error) 502 goto free3; 503 fhold(fp2); 504 fp2->f_data = (caddr_t)so2; 505 sv[1] = fd; 506 error = soconnect2(so1, so2); 507 if (error) 508 goto free4; 509 if (type == SOCK_DGRAM) { 510 /* 511 * Datagram socket connection is asymmetric. 512 */ 513 error = soconnect2(so2, so1); 514 if (error) 515 goto free4; 516 } 517 fp1->f_flag = fp2->f_flag = FREAD|FWRITE; 518 fp1->f_ops = fp2->f_ops = &socketops; 519 fp1->f_type = fp2->f_type = DTYPE_SOCKET; 520 fdrop(fp1, td); 521 fdrop(fp2, td); 522 return (error); 523 free4: 524 if (fdp->fd_ofiles[sv[1]] == fp2) { 525 fdp->fd_ofiles[sv[1]] = NULL; 526 fdrop(fp2, td); 527 } 528 fdrop(fp2, td); 529 free3: 530 if (fdp->fd_ofiles[sv[0]] == fp1) { 531 fdp->fd_ofiles[sv[0]] = NULL; 532 fdrop(fp1, td); 533 } 534 fdrop(fp1, td); 535 free2: 536 (void)soclose(so2); 537 free1: 538 (void)soclose(so1); 539 return (error); 540 } 541 542 /* 543 * socketpair(int domain, int type, int protocol, int *rsv) 544 */ 545 int 546 socketpair(struct socketpair_args *uap) 547 { 548 int error, sockv[2]; 549 550 error = kern_socketpair(uap->domain, uap->type, uap->protocol, sockv); 551 552 if (error == 0) 553 error = copyout(sockv, uap->rsv, sizeof(sockv)); 554 return (error); 555 } 556 557 int 558 kern_sendmsg(int s, struct sockaddr *sa, struct uio *auio, 559 struct mbuf *control, int flags, int *res) 560 { 561 struct thread *td = curthread; 562 struct proc *p = td->td_proc; 563 struct file *fp; 564 int len, error; 565 struct socket *so; 566 #ifdef KTRACE 567 struct iovec *ktriov = NULL; 568 struct uio ktruio; 569 #endif 570 571 error = holdsock(p->p_fd, s, &fp); 572 if (error) 573 return (error); 574 if (auio->uio_resid < 0) { 575 error = EINVAL; 576 goto done; 577 } 578 #ifdef KTRACE 579 if (KTRPOINT(td, KTR_GENIO)) { 580 int iovlen = auio->uio_iovcnt * sizeof (struct iovec); 581 582 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 583 bcopy((caddr_t)auio->uio_iov, (caddr_t)ktriov, iovlen); 584 ktruio = *auio; 585 } 586 #endif 587 len = auio->uio_resid; 588 so = (struct socket *)fp->f_data; 589 error = so_pru_sosend(so, sa, auio, NULL, control, flags, td); 590 if (error) { 591 if (auio->uio_resid != len && (error == ERESTART || 592 error == EINTR || error == EWOULDBLOCK)) 593 error = 0; 594 if (error == EPIPE) 595 psignal(p, SIGPIPE); 596 } 597 #ifdef KTRACE 598 if (ktriov != NULL) { 599 if (error == 0) { 600 ktruio.uio_iov = ktriov; 601 ktruio.uio_resid = len - auio->uio_resid; 602 ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error); 603 } 604 FREE(ktriov, M_TEMP); 605 } 606 #endif 607 if (error == 0) 608 *res = len - auio->uio_resid; 609 done: 610 fdrop(fp, td); 611 return (error); 612 } 613 614 /* 615 * sendto_args(int s, caddr_t buf, size_t len, int flags, caddr_t to, int tolen) 616 */ 617 int 618 sendto(struct sendto_args *uap) 619 { 620 struct thread *td = curthread; 621 struct uio auio; 622 struct iovec aiov; 623 struct sockaddr *sa = NULL; 624 int error; 625 626 if (uap->to) { 627 error = getsockaddr(&sa, uap->to, uap->tolen); 628 if (error) 629 return (error); 630 } 631 aiov.iov_base = uap->buf; 632 aiov.iov_len = uap->len; 633 auio.uio_iov = &aiov; 634 auio.uio_iovcnt = 1; 635 auio.uio_offset = 0; 636 auio.uio_resid = uap->len; 637 auio.uio_segflg = UIO_USERSPACE; 638 auio.uio_rw = UIO_WRITE; 639 auio.uio_td = td; 640 641 error = kern_sendmsg(uap->s, sa, &auio, NULL, uap->flags, 642 &uap->sysmsg_result); 643 644 if (sa) 645 FREE(sa, M_SONAME); 646 return (error); 647 } 648 649 /* 650 * sendmsg_args(int s, caddr_t msg, int flags) 651 */ 652 int 653 sendmsg(struct sendmsg_args *uap) 654 { 655 struct thread *td = curthread; 656 struct msghdr msg; 657 struct uio auio; 658 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 659 struct sockaddr *sa = NULL; 660 struct mbuf *control = NULL; 661 int error; 662 663 error = copyin(uap->msg, (caddr_t)&msg, sizeof(msg)); 664 if (error) 665 return (error); 666 667 /* 668 * Conditionally copyin msg.msg_name. 669 */ 670 if (msg.msg_name) { 671 error = getsockaddr(&sa, msg.msg_name, msg.msg_namelen); 672 if (error) 673 return (error); 674 } 675 676 /* 677 * Populate auio. 678 */ 679 error = iovec_copyin(msg.msg_iov, &iov, aiov, msg.msg_iovlen, 680 &auio.uio_resid); 681 if (error) 682 goto cleanup; 683 auio.uio_iov = iov; 684 auio.uio_iovcnt = msg.msg_iovlen; 685 auio.uio_offset = 0; 686 auio.uio_segflg = UIO_USERSPACE; 687 auio.uio_rw = UIO_WRITE; 688 auio.uio_td = td; 689 690 /* 691 * Conditionally copyin msg.msg_control. 692 */ 693 if (msg.msg_control) { 694 if (msg.msg_controllen < sizeof(struct cmsghdr) || 695 msg.msg_controllen > MLEN) { 696 error = EINVAL; 697 goto cleanup; 698 } 699 control = m_get(MB_WAIT, MT_CONTROL); 700 if (control == NULL) { 701 error = ENOBUFS; 702 goto cleanup; 703 } 704 control->m_len = msg.msg_controllen; 705 error = copyin(msg.msg_control, mtod(control, caddr_t), 706 msg.msg_controllen); 707 if (error) { 708 m_free(control); 709 goto cleanup; 710 } 711 } 712 713 error = kern_sendmsg(uap->s, sa, &auio, control, uap->flags, 714 &uap->sysmsg_result); 715 716 cleanup: 717 if (sa) 718 FREE(sa, M_SONAME); 719 iovec_free(&iov, aiov); 720 return (error); 721 } 722 723 /* 724 * kern_recvmsg() takes a handle to sa and control. If the handle is non- 725 * null, it returns a dynamically allocated struct sockaddr and an mbuf. 726 * Don't forget to FREE() and m_free() these if they are returned. 727 */ 728 int 729 kern_recvmsg(int s, struct sockaddr **sa, struct uio *auio, 730 struct mbuf **control, int *flags, int *res) 731 { 732 struct thread *td = curthread; 733 struct proc *p = td->td_proc; 734 struct file *fp; 735 int len, error; 736 struct socket *so; 737 #ifdef KTRACE 738 struct iovec *ktriov = NULL; 739 struct uio ktruio; 740 #endif 741 742 error = holdsock(p->p_fd, s, &fp); 743 if (error) 744 return (error); 745 if (auio->uio_resid < 0) { 746 error = EINVAL; 747 goto done; 748 } 749 #ifdef KTRACE 750 if (KTRPOINT(td, KTR_GENIO)) { 751 int iovlen = auio->uio_iovcnt * sizeof (struct iovec); 752 753 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 754 bcopy(auio->uio_iov, ktriov, iovlen); 755 ktruio = *auio; 756 } 757 #endif 758 len = auio->uio_resid; 759 so = (struct socket *)fp->f_data; 760 error = so_pru_soreceive(so, sa, auio, NULL, control, flags); 761 if (error) { 762 if (auio->uio_resid != len && (error == ERESTART || 763 error == EINTR || error == EWOULDBLOCK)) 764 error = 0; 765 } 766 #ifdef KTRACE 767 if (ktriov != NULL) { 768 if (error == 0) { 769 ktruio.uio_iov = ktriov; 770 ktruio.uio_resid = len - auio->uio_resid; 771 ktrgenio(p->p_tracep, s, UIO_READ, &ktruio, error); 772 } 773 FREE(ktriov, M_TEMP); 774 } 775 #endif 776 if (error == 0) 777 *res = len - auio->uio_resid; 778 done: 779 fdrop(fp, td); 780 return (error); 781 } 782 783 /* 784 * recvfrom_args(int s, caddr_t buf, size_t len, int flags, 785 * caddr_t from, int *fromlenaddr) 786 */ 787 int 788 recvfrom(struct recvfrom_args *uap) 789 { 790 struct thread *td = curthread; 791 struct uio auio; 792 struct iovec aiov; 793 struct sockaddr *sa = NULL; 794 int error, fromlen; 795 796 if (uap->from && uap->fromlenaddr) { 797 error = copyin(uap->fromlenaddr, &fromlen, sizeof(fromlen)); 798 if (error) 799 return (error); 800 if (fromlen < 0) 801 return (EINVAL); 802 } else { 803 fromlen = 0; 804 } 805 aiov.iov_base = uap->buf; 806 aiov.iov_len = uap->len; 807 auio.uio_iov = &aiov; 808 auio.uio_iovcnt = 1; 809 auio.uio_offset = 0; 810 auio.uio_resid = uap->len; 811 auio.uio_segflg = UIO_USERSPACE; 812 auio.uio_rw = UIO_READ; 813 auio.uio_td = td; 814 815 error = kern_recvmsg(uap->s, uap->from ? &sa : NULL, &auio, NULL, 816 &uap->flags, &uap->sysmsg_result); 817 818 if (error == 0 && uap->from) { 819 /* note: sa may still be NULL */ 820 if (sa) { 821 fromlen = MIN(fromlen, sa->sa_len); 822 error = copyout(sa, uap->from, fromlen); 823 } else { 824 fromlen = 0; 825 } 826 if (error == 0) { 827 error = copyout(&fromlen, uap->fromlenaddr, 828 sizeof(fromlen)); 829 } 830 } 831 if (sa) 832 FREE(sa, M_SONAME); 833 834 return (error); 835 } 836 837 /* 838 * recvmsg_args(int s, struct msghdr *msg, int flags) 839 */ 840 int 841 recvmsg(struct recvmsg_args *uap) 842 { 843 struct thread *td = curthread; 844 struct msghdr msg; 845 struct uio auio; 846 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 847 struct mbuf *m, *control = NULL; 848 struct sockaddr *sa = NULL; 849 caddr_t ctlbuf; 850 socklen_t *ufromlenp, *ucontrollenp; 851 int error, fromlen, controllen, len, flags, *uflagsp; 852 853 /* 854 * This copyin handles everything except the iovec. 855 */ 856 error = copyin(uap->msg, &msg, sizeof(msg)); 857 if (error) 858 return (error); 859 860 if (msg.msg_name && msg.msg_namelen < 0) 861 return (EINVAL); 862 if (msg.msg_control && msg.msg_controllen < 0) 863 return (EINVAL); 864 865 ufromlenp = (socklen_t *)((caddr_t)uap->msg + offsetof(struct msghdr, 866 msg_namelen)); 867 ucontrollenp = (socklen_t *)((caddr_t)uap->msg + offsetof(struct msghdr, 868 msg_controllen)); 869 uflagsp = (int *)((caddr_t)uap->msg + offsetof(struct msghdr, 870 msg_flags)); 871 872 /* 873 * Populate auio. 874 */ 875 error = iovec_copyin(msg.msg_iov, &iov, aiov, msg.msg_iovlen, 876 &auio.uio_resid); 877 if (error) 878 return (error); 879 auio.uio_iov = iov; 880 auio.uio_iovcnt = msg.msg_iovlen; 881 auio.uio_offset = 0; 882 auio.uio_segflg = UIO_USERSPACE; 883 auio.uio_rw = UIO_READ; 884 auio.uio_td = td; 885 886 flags = msg.msg_flags; 887 888 error = kern_recvmsg(uap->s, msg.msg_name ? &sa : NULL, &auio, 889 msg.msg_control ? &control : NULL, &flags, &uap->sysmsg_result); 890 891 /* 892 * Conditionally copyout the name and populate the namelen field. 893 */ 894 if (error == 0 && msg.msg_name) { 895 fromlen = MIN(msg.msg_namelen, sa->sa_len); 896 error = copyout(sa, msg.msg_name, fromlen); 897 if (error == 0) 898 error = copyout(&fromlen, ufromlenp, 899 sizeof(*ufromlenp)); 900 } 901 902 /* 903 * Copyout msg.msg_control and msg.msg_controllen. 904 */ 905 if (error == 0 && msg.msg_control) { 906 len = msg.msg_controllen; 907 m = control; 908 ctlbuf = (caddr_t)msg.msg_control; 909 910 while(m && len > 0) { 911 unsigned int tocopy; 912 913 if (len >= m->m_len) { 914 tocopy = m->m_len; 915 } else { 916 msg.msg_flags |= MSG_CTRUNC; 917 tocopy = len; 918 } 919 920 error = copyout(mtod(m, caddr_t), ctlbuf, tocopy); 921 if (error) 922 goto cleanup; 923 924 ctlbuf += tocopy; 925 len -= tocopy; 926 m = m->m_next; 927 } 928 controllen = ctlbuf - (caddr_t)msg.msg_control; 929 error = copyout(&controllen, ucontrollenp, 930 sizeof(*ucontrollenp)); 931 } 932 933 if (error == 0) 934 error = copyout(&flags, uflagsp, sizeof(*uflagsp)); 935 936 cleanup: 937 if (sa) 938 FREE(sa, M_SONAME); 939 iovec_free(&iov, aiov); 940 if (control) 941 m_freem(control); 942 return (error); 943 } 944 945 /* 946 * shutdown_args(int s, int how) 947 */ 948 int 949 kern_shutdown(int s, int how) 950 { 951 struct thread *td = curthread; 952 struct proc *p = td->td_proc; 953 struct file *fp; 954 int error; 955 956 KKASSERT(p); 957 error = holdsock(p->p_fd, s, &fp); 958 if (error) 959 return (error); 960 error = soshutdown((struct socket *)fp->f_data, how); 961 fdrop(fp, td); 962 return(error); 963 } 964 965 int 966 shutdown(struct shutdown_args *uap) 967 { 968 int error; 969 970 error = kern_shutdown(uap->s, uap->how); 971 972 return (error); 973 } 974 975 /* 976 * If sopt->sopt_td == NULL, then sopt->sopt_val is treated as an 977 * in kernel pointer instead of a userland pointer. This allows us 978 * to manipulate socket options in the emulation code. 979 */ 980 int 981 kern_setsockopt(int s, struct sockopt *sopt) 982 { 983 struct thread *td = curthread; 984 struct proc *p = td->td_proc; 985 struct file *fp; 986 int error; 987 988 if (sopt->sopt_val == 0 && sopt->sopt_valsize != 0) 989 return (EFAULT); 990 if (sopt->sopt_valsize < 0) 991 return (EINVAL); 992 993 error = holdsock(p->p_fd, s, &fp); 994 if (error) 995 return (error); 996 997 error = sosetopt((struct socket *)fp->f_data, sopt); 998 fdrop(fp, td); 999 return (error); 1000 } 1001 1002 /* 1003 * setsockopt_args(int s, int level, int name, caddr_t val, int valsize) 1004 */ 1005 int 1006 setsockopt(struct setsockopt_args *uap) 1007 { 1008 struct thread *td = curthread; 1009 struct sockopt sopt; 1010 int error; 1011 1012 sopt.sopt_dir = SOPT_SET; 1013 sopt.sopt_level = uap->level; 1014 sopt.sopt_name = uap->name; 1015 sopt.sopt_val = uap->val; 1016 sopt.sopt_valsize = uap->valsize; 1017 sopt.sopt_td = td; 1018 1019 error = kern_setsockopt(uap->s, &sopt); 1020 return(error); 1021 } 1022 1023 /* 1024 * If sopt->sopt_td == NULL, then sopt->sopt_val is treated as an 1025 * in kernel pointer instead of a userland pointer. This allows us 1026 * to manipulate socket options in the emulation code. 1027 */ 1028 int 1029 kern_getsockopt(int s, struct sockopt *sopt) 1030 { 1031 struct thread *td = curthread; 1032 struct proc *p = td->td_proc; 1033 struct file *fp; 1034 int error; 1035 1036 if (sopt->sopt_val == 0 && sopt->sopt_valsize != 0) 1037 return (EFAULT); 1038 if (sopt->sopt_valsize < 0) 1039 return (EINVAL); 1040 1041 error = holdsock(p->p_fd, s, &fp); 1042 if (error) 1043 return (error); 1044 1045 error = sogetopt((struct socket *)fp->f_data, sopt); 1046 fdrop(fp, td); 1047 return (error); 1048 } 1049 1050 /* 1051 * getsockopt_Args(int s, int level, int name, caddr_t val, int *avalsize) 1052 */ 1053 int 1054 getsockopt(struct getsockopt_args *uap) 1055 { 1056 struct thread *td = curthread; 1057 struct sockopt sopt; 1058 int error, valsize; 1059 1060 if (uap->val) { 1061 error = copyin(uap->avalsize, &valsize, sizeof(valsize)); 1062 if (error) 1063 return (error); 1064 if (valsize < 0) 1065 return (EINVAL); 1066 } else { 1067 valsize = 0; 1068 } 1069 1070 sopt.sopt_dir = SOPT_GET; 1071 sopt.sopt_level = uap->level; 1072 sopt.sopt_name = uap->name; 1073 sopt.sopt_val = uap->val; 1074 sopt.sopt_valsize = valsize; 1075 sopt.sopt_td = td; 1076 1077 error = kern_getsockopt(uap->s, &sopt); 1078 if (error == 0) { 1079 valsize = sopt.sopt_valsize; 1080 error = copyout(&valsize, uap->avalsize, sizeof(valsize)); 1081 } 1082 return (error); 1083 } 1084 1085 /* 1086 * The second argument to kern_getsockname() is a handle to a struct sockaddr. 1087 * This allows kern_getsockname() to return a pointer to an allocated struct 1088 * sockaddr which must be freed later with FREE(). The caller must 1089 * initialize *name to NULL. 1090 */ 1091 int 1092 kern_getsockname(int s, struct sockaddr **name, int *namelen) 1093 { 1094 struct thread *td = curthread; 1095 struct proc *p = td->td_proc; 1096 struct file *fp; 1097 struct socket *so; 1098 struct sockaddr *sa = NULL; 1099 int error; 1100 1101 error = holdsock(p->p_fd, s, &fp); 1102 if (error) 1103 return (error); 1104 if (*namelen < 0) { 1105 fdrop(fp, td); 1106 return (EINVAL); 1107 } 1108 so = (struct socket *)fp->f_data; 1109 error = so_pru_sockaddr(so, &sa); 1110 if (error == 0) { 1111 if (sa == 0) { 1112 *namelen = 0; 1113 } else { 1114 *namelen = MIN(*namelen, sa->sa_len); 1115 *name = sa; 1116 } 1117 } 1118 1119 fdrop(fp, td); 1120 return (error); 1121 } 1122 1123 /* 1124 * getsockname_args(int fdes, caddr_t asa, int *alen) 1125 * 1126 * Get socket name. 1127 */ 1128 int 1129 getsockname(struct getsockname_args *uap) 1130 { 1131 struct sockaddr *sa = NULL; 1132 int error, sa_len; 1133 1134 error = copyin(uap->alen, &sa_len, sizeof(sa_len)); 1135 if (error) 1136 return (error); 1137 1138 error = kern_getsockname(uap->fdes, &sa, &sa_len); 1139 1140 if (error == 0) 1141 error = copyout(sa, uap->asa, sa_len); 1142 if (error == 0) 1143 error = copyout(&sa_len, uap->alen, sizeof(*uap->alen)); 1144 if (sa) 1145 FREE(sa, M_SONAME); 1146 return (error); 1147 } 1148 1149 /* 1150 * The second argument to kern_getpeername() is a handle to a struct sockaddr. 1151 * This allows kern_getpeername() to return a pointer to an allocated struct 1152 * sockaddr which must be freed later with FREE(). The caller must 1153 * initialize *name to NULL. 1154 */ 1155 int 1156 kern_getpeername(int s, struct sockaddr **name, int *namelen) 1157 { 1158 struct thread *td = curthread; 1159 struct proc *p = td->td_proc; 1160 struct file *fp; 1161 struct socket *so; 1162 struct sockaddr *sa = NULL; 1163 int error; 1164 1165 error = holdsock(p->p_fd, s, &fp); 1166 if (error) 1167 return (error); 1168 if (*namelen < 0) { 1169 fdrop(fp, td); 1170 return (EINVAL); 1171 } 1172 so = (struct socket *)fp->f_data; 1173 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1174 fdrop(fp, td); 1175 return (ENOTCONN); 1176 } 1177 error = so_pru_peeraddr(so, &sa); 1178 if (error == 0) { 1179 if (sa == 0) { 1180 *namelen = 0; 1181 } else { 1182 *namelen = MIN(*namelen, sa->sa_len); 1183 *name = sa; 1184 } 1185 } 1186 1187 fdrop(fp, td); 1188 return (error); 1189 } 1190 1191 /* 1192 * getpeername_args(int fdes, caddr_t asa, int *alen) 1193 * 1194 * Get name of peer for connected socket. 1195 */ 1196 int 1197 getpeername(struct getpeername_args *uap) 1198 { 1199 struct sockaddr *sa = NULL; 1200 int error, sa_len; 1201 1202 error = copyin(uap->alen, &sa_len, sizeof(sa_len)); 1203 if (error) 1204 return (error); 1205 1206 error = kern_getpeername(uap->fdes, &sa, &sa_len); 1207 1208 if (error == 0) 1209 error = copyout(sa, uap->asa, sa_len); 1210 if (error == 0) 1211 error = copyout(&sa_len, uap->alen, sizeof(*uap->alen)); 1212 if (sa) 1213 FREE(sa, M_SONAME); 1214 return (error); 1215 } 1216 1217 int 1218 getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len) 1219 { 1220 struct sockaddr *sa; 1221 int error; 1222 1223 *namp = NULL; 1224 if (len > SOCK_MAXADDRLEN) 1225 return ENAMETOOLONG; 1226 if (len < offsetof(struct sockaddr, sa_data[0])) 1227 return EDOM; 1228 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1229 error = copyin(uaddr, sa, len); 1230 if (error) { 1231 FREE(sa, M_SONAME); 1232 } else { 1233 #if BYTE_ORDER != BIG_ENDIAN 1234 /* 1235 * The bind(), connect(), and sendto() syscalls were not 1236 * versioned for COMPAT_43. Thus, this check must stay. 1237 */ 1238 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1239 sa->sa_family = sa->sa_len; 1240 #endif 1241 sa->sa_len = len; 1242 *namp = sa; 1243 } 1244 return error; 1245 } 1246 1247 /* 1248 * holdsock() - load the struct file pointer associated 1249 * with a socket into *fpp. If an error occurs, non-zero 1250 * will be returned and *fpp will be set to NULL. 1251 */ 1252 int 1253 holdsock(fdp, fdes, fpp) 1254 struct filedesc *fdp; 1255 int fdes; 1256 struct file **fpp; 1257 { 1258 struct file *fp = NULL; 1259 int error = 0; 1260 1261 if ((unsigned)fdes >= fdp->fd_nfiles || 1262 (fp = fdp->fd_ofiles[fdes]) == NULL) { 1263 error = EBADF; 1264 } else if (fp->f_type != DTYPE_SOCKET) { 1265 error = ENOTSOCK; 1266 fp = NULL; 1267 } else { 1268 fhold(fp); 1269 } 1270 *fpp = fp; 1271 return(error); 1272 } 1273 1274 /* 1275 * Detach a mapped page and release resources back to the system. 1276 * We must release our wiring and if the object is ripped out 1277 * from under the vm_page we become responsible for freeing the 1278 * page. 1279 * 1280 * XXX HACK XXX TEMPORARY UNTIL WE IMPLEMENT EXT MBUF REFERENCE COUNTING 1281 */ 1282 static void 1283 sf_buf_mref(caddr_t addr, u_int size) 1284 { 1285 struct sf_buf *sf; 1286 1287 sf = sf_buf_tosf(addr); 1288 ++sf->aux2; 1289 } 1290 1291 static void 1292 sf_buf_mext(caddr_t addr, u_int size) 1293 { 1294 struct sf_buf *sf; 1295 vm_page_t m; 1296 int s; 1297 int n; 1298 1299 sf = sf_buf_tosf(addr); 1300 KKASSERT(sf->aux2 > 0); 1301 if (--sf->aux2 == 0) { 1302 m = sf_buf_page(sf); 1303 n = sf->aux1; 1304 sf->aux1 = 0; 1305 sf_buf_free(sf); 1306 s = splvm(); 1307 while (n > 0) { 1308 --n; 1309 vm_page_unwire(m, 0); 1310 } 1311 if (m->wire_count == 0 && m->object == NULL) 1312 vm_page_free(m); 1313 splx(s); 1314 } 1315 } 1316 1317 /* 1318 * sendfile(2). 1319 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1320 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1321 * 1322 * Send a file specified by 'fd' and starting at 'offset' to a socket 1323 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1324 * nbytes == 0. Optionally add a header and/or trailer to the socket 1325 * output. If specified, write the total number of bytes sent into *sbytes. 1326 * 1327 * In FreeBSD kern/uipc_syscalls.c,v 1.103, a bug was fixed that caused 1328 * the headers to count against the remaining bytes to be sent from 1329 * the file descriptor. We may wish to implement a compatibility syscall 1330 * in the future. 1331 */ 1332 int 1333 sendfile(struct sendfile_args *uap) 1334 { 1335 struct thread *td = curthread; 1336 struct proc *p = td->td_proc; 1337 struct file *fp; 1338 struct filedesc *fdp; 1339 struct vnode *vp = NULL; 1340 struct sf_hdtr hdtr; 1341 struct iovec aiov[UIO_SMALLIOV], *iov = NULL; 1342 struct uio auio; 1343 struct mbuf *mheader = NULL; 1344 off_t hdtr_size = 0, sbytes; 1345 int error, hbytes = 0, tbytes; 1346 1347 KKASSERT(p); 1348 fdp = p->p_fd; 1349 1350 /* 1351 * Do argument checking. Must be a regular file in, stream 1352 * type and connected socket out, positive offset. 1353 */ 1354 fp = holdfp(fdp, uap->fd, FREAD); 1355 if (fp == NULL) { 1356 return (EBADF); 1357 } 1358 if (fp->f_type != DTYPE_VNODE) { 1359 fdrop(fp, td); 1360 return (EINVAL); 1361 } 1362 vp = (struct vnode *)fp->f_data; 1363 vref(vp); 1364 fdrop(fp, td); 1365 1366 /* 1367 * If specified, get the pointer to the sf_hdtr struct for 1368 * any headers/trailers. 1369 */ 1370 if (uap->hdtr) { 1371 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1372 if (error) 1373 goto done; 1374 /* 1375 * Send any headers. 1376 */ 1377 if (hdtr.headers) { 1378 error = iovec_copyin(hdtr.headers, &iov, aiov, 1379 hdtr.hdr_cnt, &hbytes); 1380 if (error) 1381 goto done; 1382 auio.uio_iov = iov; 1383 auio.uio_iovcnt = hdtr.hdr_cnt; 1384 auio.uio_offset = 0; 1385 auio.uio_segflg = UIO_USERSPACE; 1386 auio.uio_rw = UIO_WRITE; 1387 auio.uio_td = td; 1388 auio.uio_resid = hbytes; 1389 1390 mheader = m_uiomove(&auio, MB_WAIT, 0); 1391 1392 iovec_free(&iov, aiov); 1393 if (mheader == NULL) 1394 goto done; 1395 } 1396 } 1397 1398 error = kern_sendfile(vp, uap->s, uap->offset, uap->nbytes, mheader, 1399 &sbytes, uap->flags); 1400 if (error) 1401 goto done; 1402 1403 /* 1404 * Send trailers. Wimp out and use writev(2). 1405 */ 1406 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1407 error = iovec_copyin(hdtr.trailers, &iov, aiov, 1408 hdtr.trl_cnt, &auio.uio_resid); 1409 if (error) 1410 goto done; 1411 auio.uio_iov = iov; 1412 auio.uio_iovcnt = hdtr.trl_cnt; 1413 auio.uio_offset = 0; 1414 auio.uio_segflg = UIO_USERSPACE; 1415 auio.uio_rw = UIO_WRITE; 1416 auio.uio_td = td; 1417 1418 error = kern_sendmsg(uap->s, NULL, &auio, NULL, 0, &tbytes); 1419 1420 iovec_free(&iov, aiov); 1421 if (error) 1422 goto done; 1423 hdtr_size += tbytes; /* trailer bytes successfully sent */ 1424 } 1425 1426 done: 1427 if (uap->sbytes != NULL) { 1428 sbytes += hdtr_size; 1429 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1430 } 1431 if (vp) 1432 vrele(vp); 1433 return (error); 1434 } 1435 1436 int 1437 kern_sendfile(struct vnode *vp, int sfd, off_t offset, size_t nbytes, 1438 struct mbuf *mheader, off_t *sbytes, int flags) 1439 { 1440 struct thread *td = curthread; 1441 struct proc *p = td->td_proc; 1442 struct vm_object *obj; 1443 struct socket *so; 1444 struct file *fp; 1445 struct mbuf *m; 1446 struct sf_buf *sf; 1447 struct vm_page *pg; 1448 off_t off, xfsize; 1449 off_t hbytes = 0; 1450 int error = 0; 1451 int s; 1452 1453 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1454 error = EINVAL; 1455 goto done; 1456 } 1457 error = holdsock(p->p_fd, sfd, &fp); 1458 if (error) 1459 goto done; 1460 so = (struct socket *)fp->f_data; 1461 if (so->so_type != SOCK_STREAM) { 1462 error = EINVAL; 1463 goto done; 1464 } 1465 if ((so->so_state & SS_ISCONNECTED) == 0) { 1466 error = ENOTCONN; 1467 goto done; 1468 } 1469 if (offset < 0) { 1470 error = EINVAL; 1471 goto done; 1472 } 1473 1474 *sbytes = 0; 1475 /* 1476 * Protect against multiple writers to the socket. 1477 */ 1478 (void) sblock(&so->so_snd, M_WAITOK); 1479 1480 /* 1481 * Loop through the pages in the file, starting with the requested 1482 * offset. Get a file page (do I/O if necessary), map the file page 1483 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1484 * it on the socket. 1485 */ 1486 for (off = offset; ; off += xfsize, *sbytes += xfsize + hbytes) { 1487 vm_pindex_t pindex; 1488 vm_offset_t pgoff; 1489 1490 pindex = OFF_TO_IDX(off); 1491 retry_lookup: 1492 /* 1493 * Calculate the amount to transfer. Not to exceed a page, 1494 * the EOF, or the passed in nbytes. 1495 */ 1496 xfsize = obj->un_pager.vnp.vnp_size - off; 1497 if (xfsize > PAGE_SIZE) 1498 xfsize = PAGE_SIZE; 1499 pgoff = (vm_offset_t)(off & PAGE_MASK); 1500 if (PAGE_SIZE - pgoff < xfsize) 1501 xfsize = PAGE_SIZE - pgoff; 1502 if (nbytes && xfsize > (nbytes - *sbytes)) 1503 xfsize = nbytes - *sbytes; 1504 if (xfsize <= 0) 1505 break; 1506 /* 1507 * Optimize the non-blocking case by looking at the socket space 1508 * before going to the extra work of constituting the sf_buf. 1509 */ 1510 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1511 if (so->so_state & SS_CANTSENDMORE) 1512 error = EPIPE; 1513 else 1514 error = EAGAIN; 1515 sbunlock(&so->so_snd); 1516 goto done; 1517 } 1518 /* 1519 * Attempt to look up the page. 1520 * 1521 * Allocate if not found, wait and loop if busy, then 1522 * wire the page. splvm() protection is required to 1523 * maintain the object association (an interrupt can 1524 * free the page) through to the vm_page_wire() call. 1525 */ 1526 s = splvm(); 1527 pg = vm_page_lookup(obj, pindex); 1528 if (pg == NULL) { 1529 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL); 1530 if (pg == NULL) { 1531 vm_wait(); 1532 splx(s); 1533 goto retry_lookup; 1534 } 1535 vm_page_wakeup(pg); 1536 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) { 1537 splx(s); 1538 goto retry_lookup; 1539 } 1540 vm_page_wire(pg); 1541 splx(s); 1542 1543 /* 1544 * If page is not valid for what we need, initiate I/O 1545 */ 1546 1547 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1548 struct uio auio; 1549 struct iovec aiov; 1550 int bsize; 1551 1552 /* 1553 * Ensure that our page is still around when the I/O 1554 * completes. 1555 */ 1556 vm_page_io_start(pg); 1557 1558 /* 1559 * Get the page from backing store. 1560 */ 1561 bsize = vp->v_mount->mnt_stat.f_iosize; 1562 auio.uio_iov = &aiov; 1563 auio.uio_iovcnt = 1; 1564 aiov.iov_base = 0; 1565 aiov.iov_len = MAXBSIZE; 1566 auio.uio_resid = MAXBSIZE; 1567 auio.uio_offset = trunc_page(off); 1568 auio.uio_segflg = UIO_NOCOPY; 1569 auio.uio_rw = UIO_READ; 1570 auio.uio_td = td; 1571 vn_lock(vp, NULL, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1572 error = VOP_READ(vp, &auio, 1573 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1574 p->p_ucred); 1575 VOP_UNLOCK(vp, NULL, 0, td); 1576 vm_page_flag_clear(pg, PG_ZERO); 1577 vm_page_io_finish(pg); 1578 if (error) { 1579 vm_page_unwire(pg, 0); 1580 /* 1581 * See if anyone else might know about this page. 1582 * If not and it is not valid, then free it. 1583 */ 1584 if (pg->wire_count == 0 && pg->valid == 0 && 1585 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1586 pg->hold_count == 0) { 1587 vm_page_busy(pg); 1588 vm_page_free(pg); 1589 } 1590 sbunlock(&so->so_snd); 1591 goto done; 1592 } 1593 } 1594 1595 1596 /* 1597 * Get a sendfile buf. We usually wait as long as necessary, 1598 * but this wait can be interrupted. 1599 */ 1600 if ((sf = sf_buf_alloc(pg, SFBA_PCATCH)) == NULL) { 1601 s = splvm(); 1602 vm_page_unwire(pg, 0); 1603 if (pg->wire_count == 0 && pg->object == NULL) 1604 vm_page_free(pg); 1605 splx(s); 1606 sbunlock(&so->so_snd); 1607 error = EINTR; 1608 goto done; 1609 } 1610 1611 /* 1612 * Get an mbuf header and set it up as having external storage. 1613 */ 1614 MGETHDR(m, MB_WAIT, MT_DATA); 1615 if (m == NULL) { 1616 error = ENOBUFS; 1617 sf_buf_free(sf); 1618 sbunlock(&so->so_snd); 1619 goto done; 1620 } 1621 ++sf->aux1; /* wiring count */ 1622 ++sf->aux2; /* initial reference */ 1623 m->m_ext.ext_free = sf_buf_mext; 1624 m->m_ext.ext_ref = sf_buf_mref; 1625 m->m_ext.ext_buf = (void *)sf->kva; 1626 m->m_ext.ext_size = PAGE_SIZE; 1627 m->m_data = (char *) sf->kva + pgoff; 1628 m->m_flags |= M_EXT; 1629 m->m_pkthdr.len = m->m_len = xfsize; 1630 1631 if (mheader != NULL) { 1632 hbytes = mheader->m_pkthdr.len; 1633 mheader->m_pkthdr.len += m->m_pkthdr.len; 1634 m_cat(mheader, m); 1635 m = mheader; 1636 mheader = NULL; 1637 } else 1638 hbytes = 0; 1639 1640 /* 1641 * Add the buffer to the socket buffer chain. 1642 */ 1643 s = splnet(); 1644 retry_space: 1645 /* 1646 * Make sure that the socket is still able to take more data. 1647 * CANTSENDMORE being true usually means that the connection 1648 * was closed. so_error is true when an error was sensed after 1649 * a previous send. 1650 * The state is checked after the page mapping and buffer 1651 * allocation above since those operations may block and make 1652 * any socket checks stale. From this point forward, nothing 1653 * blocks before the pru_send (or more accurately, any blocking 1654 * results in a loop back to here to re-check). 1655 */ 1656 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1657 if (so->so_state & SS_CANTSENDMORE) { 1658 error = EPIPE; 1659 } else { 1660 error = so->so_error; 1661 so->so_error = 0; 1662 } 1663 m_freem(m); 1664 sbunlock(&so->so_snd); 1665 splx(s); 1666 goto done; 1667 } 1668 /* 1669 * Wait for socket space to become available. We do this just 1670 * after checking the connection state above in order to avoid 1671 * a race condition with sbwait(). 1672 */ 1673 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1674 if (so->so_state & SS_NBIO) { 1675 m_freem(m); 1676 sbunlock(&so->so_snd); 1677 splx(s); 1678 error = EAGAIN; 1679 goto done; 1680 } 1681 error = sbwait(&so->so_snd); 1682 /* 1683 * An error from sbwait usually indicates that we've 1684 * been interrupted by a signal. If we've sent anything 1685 * then return bytes sent, otherwise return the error. 1686 */ 1687 if (error) { 1688 m_freem(m); 1689 sbunlock(&so->so_snd); 1690 splx(s); 1691 goto done; 1692 } 1693 goto retry_space; 1694 } 1695 error = so_pru_send(so, 0, m, NULL, NULL, td); 1696 splx(s); 1697 if (error) { 1698 sbunlock(&so->so_snd); 1699 goto done; 1700 } 1701 } 1702 if (mheader != NULL) { 1703 *sbytes += mheader->m_pkthdr.len; 1704 error = so_pru_send(so, 0, mheader, NULL, NULL, td); 1705 mheader = NULL; 1706 } 1707 sbunlock(&so->so_snd); 1708 1709 done: 1710 if (fp) 1711 fdrop(fp, td); 1712 if (mheader != NULL) 1713 m_freem(mheader); 1714 return (error); 1715 } 1716