1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1988, 1990, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 67 * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.24 2003/11/11 17:18:18 silby Exp $ 68 */ 69 70 #include "opt_inet.h" 71 #include "opt_sctp.h" 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/fcntl.h> 76 #include <sys/malloc.h> 77 #include <sys/mbuf.h> 78 #include <sys/domain.h> 79 #include <sys/file.h> /* for struct knote */ 80 #include <sys/kernel.h> 81 #include <sys/event.h> 82 #include <sys/proc.h> 83 #include <sys/protosw.h> 84 #include <sys/socket.h> 85 #include <sys/socketvar.h> 86 #include <sys/socketops.h> 87 #include <sys/resourcevar.h> 88 #include <sys/signalvar.h> 89 #include <sys/sysctl.h> 90 #include <sys/uio.h> 91 #include <sys/jail.h> 92 #include <vm/vm_zone.h> 93 #include <vm/pmap.h> 94 #include <net/netmsg2.h> 95 96 #include <sys/thread2.h> 97 #include <sys/socketvar2.h> 98 99 #include <machine/limits.h> 100 101 extern int tcp_sosend_agglim; 102 extern int tcp_sosend_async; 103 extern int udp_sosend_async; 104 105 #ifdef INET 106 static int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt); 107 #endif /* INET */ 108 109 static void filt_sordetach(struct knote *kn); 110 static int filt_soread(struct knote *kn, long hint); 111 static void filt_sowdetach(struct knote *kn); 112 static int filt_sowrite(struct knote *kn, long hint); 113 static int filt_solisten(struct knote *kn, long hint); 114 115 static void sodiscard(struct socket *so); 116 static int soclose_sync(struct socket *so, int fflag); 117 static void soclose_fast(struct socket *so); 118 119 static struct filterops solisten_filtops = 120 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, filt_sordetach, filt_solisten }; 121 static struct filterops soread_filtops = 122 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, filt_sordetach, filt_soread }; 123 static struct filterops sowrite_filtops = 124 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, filt_sowdetach, filt_sowrite }; 125 static struct filterops soexcept_filtops = 126 { FILTEROP_ISFD|FILTEROP_MPSAFE, NULL, filt_sordetach, filt_soread }; 127 128 MALLOC_DEFINE(M_SOCKET, "socket", "socket struct"); 129 MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 130 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 131 132 133 static int somaxconn = SOMAXCONN; 134 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, 135 &somaxconn, 0, "Maximum pending socket connection queue size"); 136 137 static int use_soclose_fast = 1; 138 SYSCTL_INT(_kern_ipc, OID_AUTO, soclose_fast, CTLFLAG_RW, 139 &use_soclose_fast, 0, "Fast socket close"); 140 141 int use_soaccept_pred_fast = 1; 142 SYSCTL_INT(_kern_ipc, OID_AUTO, soaccept_pred_fast, CTLFLAG_RW, 143 &use_soaccept_pred_fast, 0, "Fast socket accept predication"); 144 145 int use_sendfile_async = 1; 146 SYSCTL_INT(_kern_ipc, OID_AUTO, sendfile_async, CTLFLAG_RW, 147 &use_sendfile_async, 0, "sendfile uses asynchronized pru_send"); 148 149 /* 150 * Socket operation routines. 151 * These routines are called by the routines in 152 * sys_socket.c or from a system process, and 153 * implement the semantics of socket operations by 154 * switching out to the protocol specific routines. 155 */ 156 157 /* 158 * Get a socket structure, and initialize it. 159 * Note that it would probably be better to allocate socket 160 * and PCB at the same time, but I'm not convinced that all 161 * the protocols can be easily modified to do this. 162 */ 163 struct socket * 164 soalloc(int waitok) 165 { 166 struct socket *so; 167 unsigned waitmask; 168 169 waitmask = waitok ? M_WAITOK : M_NOWAIT; 170 so = kmalloc(sizeof(struct socket), M_SOCKET, M_ZERO|waitmask); 171 if (so) { 172 /* XXX race condition for reentrant kernel */ 173 TAILQ_INIT(&so->so_aiojobq); 174 TAILQ_INIT(&so->so_rcv.ssb_kq.ki_mlist); 175 TAILQ_INIT(&so->so_snd.ssb_kq.ki_mlist); 176 lwkt_token_init(&so->so_rcv.ssb_token, "rcvtok"); 177 lwkt_token_init(&so->so_snd.ssb_token, "sndtok"); 178 so->so_state = SS_NOFDREF; 179 so->so_refs = 1; 180 } 181 return so; 182 } 183 184 int 185 socreate(int dom, struct socket **aso, int type, 186 int proto, struct thread *td) 187 { 188 struct proc *p = td->td_proc; 189 struct protosw *prp; 190 struct socket *so; 191 struct pru_attach_info ai; 192 int error; 193 194 if (proto) 195 prp = pffindproto(dom, proto, type); 196 else 197 prp = pffindtype(dom, type); 198 199 if (prp == NULL || prp->pr_usrreqs->pru_attach == 0) 200 return (EPROTONOSUPPORT); 201 202 if (p->p_ucred->cr_prison && jail_socket_unixiproute_only && 203 prp->pr_domain->dom_family != PF_LOCAL && 204 prp->pr_domain->dom_family != PF_INET && 205 prp->pr_domain->dom_family != PF_INET6 && 206 prp->pr_domain->dom_family != PF_ROUTE) { 207 return (EPROTONOSUPPORT); 208 } 209 210 if (prp->pr_type != type) 211 return (EPROTOTYPE); 212 so = soalloc(p != NULL); 213 if (so == NULL) 214 return (ENOBUFS); 215 216 /* 217 * Callers of socreate() presumably will connect up a descriptor 218 * and call soclose() if they cannot. This represents our so_refs 219 * (which should be 1) from soalloc(). 220 */ 221 soclrstate(so, SS_NOFDREF); 222 223 /* 224 * Set a default port for protocol processing. No action will occur 225 * on the socket on this port until an inpcb is attached to it and 226 * is able to match incoming packets, or until the socket becomes 227 * available to userland. 228 * 229 * We normally default the socket to the protocol thread on cpu 0. 230 * If PR_SYNC_PORT is set (unix domain sockets) there is no protocol 231 * thread and all pr_*()/pru_*() calls are executed synchronously. 232 */ 233 if (prp->pr_flags & PR_SYNC_PORT) 234 so->so_port = &netisr_sync_port; 235 else 236 so->so_port = cpu_portfn(0); 237 238 TAILQ_INIT(&so->so_incomp); 239 TAILQ_INIT(&so->so_comp); 240 so->so_type = type; 241 so->so_cred = crhold(p->p_ucred); 242 so->so_proto = prp; 243 ai.sb_rlimit = &p->p_rlimit[RLIMIT_SBSIZE]; 244 ai.p_ucred = p->p_ucred; 245 ai.fd_rdir = p->p_fd->fd_rdir; 246 247 /* 248 * Auto-sizing of socket buffers is managed by the protocols and 249 * the appropriate flags must be set in the pru_attach function. 250 */ 251 error = so_pru_attach(so, proto, &ai); 252 if (error) { 253 sosetstate(so, SS_NOFDREF); 254 sofree(so); /* from soalloc */ 255 return error; 256 } 257 258 /* 259 * NOTE: Returns referenced socket. 260 */ 261 *aso = so; 262 return (0); 263 } 264 265 int 266 sobind(struct socket *so, struct sockaddr *nam, struct thread *td) 267 { 268 int error; 269 270 error = so_pru_bind(so, nam, td); 271 return (error); 272 } 273 274 static void 275 sodealloc(struct socket *so) 276 { 277 if (so->so_rcv.ssb_hiwat) 278 (void)chgsbsize(so->so_cred->cr_uidinfo, 279 &so->so_rcv.ssb_hiwat, 0, RLIM_INFINITY); 280 if (so->so_snd.ssb_hiwat) 281 (void)chgsbsize(so->so_cred->cr_uidinfo, 282 &so->so_snd.ssb_hiwat, 0, RLIM_INFINITY); 283 #ifdef INET 284 /* remove accept filter if present */ 285 if (so->so_accf != NULL) 286 do_setopt_accept_filter(so, NULL); 287 #endif /* INET */ 288 crfree(so->so_cred); 289 if (so->so_faddr != NULL) 290 kfree(so->so_faddr, M_SONAME); 291 kfree(so, M_SOCKET); 292 } 293 294 int 295 solisten(struct socket *so, int backlog, struct thread *td) 296 { 297 int error; 298 #ifdef SCTP 299 short oldopt, oldqlimit; 300 #endif /* SCTP */ 301 302 if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) 303 return (EINVAL); 304 305 #ifdef SCTP 306 oldopt = so->so_options; 307 oldqlimit = so->so_qlimit; 308 #endif /* SCTP */ 309 310 lwkt_gettoken(&so->so_rcv.ssb_token); 311 if (TAILQ_EMPTY(&so->so_comp)) 312 so->so_options |= SO_ACCEPTCONN; 313 lwkt_reltoken(&so->so_rcv.ssb_token); 314 if (backlog < 0 || backlog > somaxconn) 315 backlog = somaxconn; 316 so->so_qlimit = backlog; 317 /* SCTP needs to look at tweak both the inbound backlog parameter AND 318 * the so_options (UDP model both connect's and gets inbound 319 * connections .. implicitly). 320 */ 321 error = so_pru_listen(so, td); 322 if (error) { 323 #ifdef SCTP 324 /* Restore the params */ 325 so->so_options = oldopt; 326 so->so_qlimit = oldqlimit; 327 #endif /* SCTP */ 328 return (error); 329 } 330 return (0); 331 } 332 333 /* 334 * Destroy a disconnected socket. This routine is a NOP if entities 335 * still have a reference on the socket: 336 * 337 * so_pcb - The protocol stack still has a reference 338 * SS_NOFDREF - There is no longer a file pointer reference 339 */ 340 void 341 sofree(struct socket *so) 342 { 343 struct socket *head; 344 345 /* 346 * This is a bit hackish at the moment. We need to interlock 347 * any accept queue we are on before we potentially lose the 348 * last reference to avoid races against a re-reference from 349 * someone operating on the queue. 350 */ 351 while ((head = so->so_head) != NULL) { 352 lwkt_getpooltoken(head); 353 if (so->so_head == head) 354 break; 355 lwkt_relpooltoken(head); 356 } 357 358 /* 359 * Arbitrage the last free. 360 */ 361 KKASSERT(so->so_refs > 0); 362 if (atomic_fetchadd_int(&so->so_refs, -1) != 1) { 363 if (head) 364 lwkt_relpooltoken(head); 365 return; 366 } 367 368 KKASSERT(so->so_pcb == NULL && (so->so_state & SS_NOFDREF)); 369 KKASSERT((so->so_state & SS_ASSERTINPROG) == 0); 370 371 /* 372 * We're done, remove ourselves from the accept queue we are 373 * on, if we are on one. 374 */ 375 if (head != NULL) { 376 if (so->so_state & SS_INCOMP) { 377 TAILQ_REMOVE(&head->so_incomp, so, so_list); 378 head->so_incqlen--; 379 } else if (so->so_state & SS_COMP) { 380 /* 381 * We must not decommission a socket that's 382 * on the accept(2) queue. If we do, then 383 * accept(2) may hang after select(2) indicated 384 * that the listening socket was ready. 385 */ 386 lwkt_relpooltoken(head); 387 return; 388 } else { 389 panic("sofree: not queued"); 390 } 391 soclrstate(so, SS_INCOMP); 392 so->so_head = NULL; 393 lwkt_relpooltoken(head); 394 } 395 ssb_release(&so->so_snd, so); 396 sorflush(so); 397 sodealloc(so); 398 } 399 400 /* 401 * Close a socket on last file table reference removal. 402 * Initiate disconnect if connected. 403 * Free socket when disconnect complete. 404 */ 405 int 406 soclose(struct socket *so, int fflag) 407 { 408 int error; 409 410 funsetown(&so->so_sigio); 411 if (!use_soclose_fast || 412 (so->so_proto->pr_flags & PR_SYNC_PORT) || 413 (so->so_options & SO_LINGER)) { 414 error = soclose_sync(so, fflag); 415 } else { 416 soclose_fast(so); 417 error = 0; 418 } 419 return error; 420 } 421 422 static void 423 sodiscard(struct socket *so) 424 { 425 lwkt_getpooltoken(so); 426 if (so->so_options & SO_ACCEPTCONN) { 427 struct socket *sp; 428 429 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { 430 TAILQ_REMOVE(&so->so_incomp, sp, so_list); 431 soclrstate(sp, SS_INCOMP); 432 sp->so_head = NULL; 433 so->so_incqlen--; 434 soaborta(sp); 435 } 436 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) { 437 TAILQ_REMOVE(&so->so_comp, sp, so_list); 438 soclrstate(sp, SS_COMP); 439 sp->so_head = NULL; 440 so->so_qlen--; 441 soaborta(sp); 442 } 443 } 444 lwkt_relpooltoken(so); 445 446 if (so->so_state & SS_NOFDREF) 447 panic("soclose: NOFDREF"); 448 sosetstate(so, SS_NOFDREF); /* take ref */ 449 } 450 451 static int 452 soclose_sync(struct socket *so, int fflag) 453 { 454 int error = 0; 455 456 if (so->so_pcb == NULL) 457 goto discard; 458 if (so->so_state & SS_ISCONNECTED) { 459 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 460 error = sodisconnect(so); 461 if (error) 462 goto drop; 463 } 464 if (so->so_options & SO_LINGER) { 465 if ((so->so_state & SS_ISDISCONNECTING) && 466 (fflag & FNONBLOCK)) 467 goto drop; 468 while (so->so_state & SS_ISCONNECTED) { 469 error = tsleep(&so->so_timeo, PCATCH, 470 "soclos", so->so_linger * hz); 471 if (error) 472 break; 473 } 474 } 475 } 476 drop: 477 if (so->so_pcb) { 478 int error2; 479 480 error2 = so_pru_detach(so); 481 if (error == 0) 482 error = error2; 483 } 484 discard: 485 sodiscard(so); 486 so_pru_sync(so); /* unpend async sending */ 487 sofree(so); /* dispose of ref */ 488 489 return (error); 490 } 491 492 static void 493 soclose_sofree_async_handler(netmsg_t msg) 494 { 495 sofree(msg->base.nm_so); 496 } 497 498 static void 499 soclose_sofree_async(struct socket *so) 500 { 501 struct netmsg_base *base = &so->so_clomsg; 502 503 netmsg_init(base, so, &netisr_apanic_rport, 0, 504 soclose_sofree_async_handler); 505 lwkt_sendmsg(so->so_port, &base->lmsg); 506 } 507 508 static void 509 soclose_disconn_async_handler(netmsg_t msg) 510 { 511 struct socket *so = msg->base.nm_so; 512 513 if ((so->so_state & SS_ISCONNECTED) && 514 (so->so_state & SS_ISDISCONNECTING) == 0) 515 so_pru_disconnect_direct(so); 516 517 if (so->so_pcb) 518 so_pru_detach_direct(so); 519 520 sodiscard(so); 521 sofree(so); 522 } 523 524 static void 525 soclose_disconn_async(struct socket *so) 526 { 527 struct netmsg_base *base = &so->so_clomsg; 528 529 netmsg_init(base, so, &netisr_apanic_rport, 0, 530 soclose_disconn_async_handler); 531 lwkt_sendmsg(so->so_port, &base->lmsg); 532 } 533 534 static void 535 soclose_detach_async_handler(netmsg_t msg) 536 { 537 struct socket *so = msg->base.nm_so; 538 539 if (so->so_pcb) 540 so_pru_detach_direct(so); 541 542 sodiscard(so); 543 sofree(so); 544 } 545 546 static void 547 soclose_detach_async(struct socket *so) 548 { 549 struct netmsg_base *base = &so->so_clomsg; 550 551 netmsg_init(base, so, &netisr_apanic_rport, 0, 552 soclose_detach_async_handler); 553 lwkt_sendmsg(so->so_port, &base->lmsg); 554 } 555 556 static void 557 soclose_fast(struct socket *so) 558 { 559 if (so->so_pcb == NULL) 560 goto discard; 561 562 if ((so->so_state & SS_ISCONNECTED) && 563 (so->so_state & SS_ISDISCONNECTING) == 0) { 564 soclose_disconn_async(so); 565 return; 566 } 567 568 if (so->so_pcb) { 569 soclose_detach_async(so); 570 return; 571 } 572 573 discard: 574 sodiscard(so); 575 soclose_sofree_async(so); 576 } 577 578 /* 579 * Abort and destroy a socket. Only one abort can be in progress 580 * at any given moment. 581 */ 582 void 583 soabort(struct socket *so) 584 { 585 soreference(so); 586 so_pru_abort(so); 587 } 588 589 void 590 soaborta(struct socket *so) 591 { 592 soreference(so); 593 so_pru_aborta(so); 594 } 595 596 void 597 soabort_oncpu(struct socket *so) 598 { 599 soreference(so); 600 so_pru_abort_oncpu(so); 601 } 602 603 /* 604 * so is passed in ref'd, which becomes owned by 605 * the cleared SS_NOFDREF flag. 606 */ 607 void 608 soaccept_generic(struct socket *so) 609 { 610 if ((so->so_state & SS_NOFDREF) == 0) 611 panic("soaccept: !NOFDREF"); 612 soclrstate(so, SS_NOFDREF); /* owned by lack of SS_NOFDREF */ 613 } 614 615 int 616 soaccept(struct socket *so, struct sockaddr **nam) 617 { 618 int error; 619 620 soaccept_generic(so); 621 error = so_pru_accept(so, nam); 622 return (error); 623 } 624 625 int 626 soconnect(struct socket *so, struct sockaddr *nam, struct thread *td) 627 { 628 int error; 629 630 if (so->so_options & SO_ACCEPTCONN) 631 return (EOPNOTSUPP); 632 /* 633 * If protocol is connection-based, can only connect once. 634 * Otherwise, if connected, try to disconnect first. 635 * This allows user to disconnect by connecting to, e.g., 636 * a null address. 637 */ 638 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 639 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 640 (error = sodisconnect(so)))) { 641 error = EISCONN; 642 } else { 643 /* 644 * Prevent accumulated error from previous connection 645 * from biting us. 646 */ 647 so->so_error = 0; 648 error = so_pru_connect(so, nam, td); 649 } 650 return (error); 651 } 652 653 int 654 soconnect2(struct socket *so1, struct socket *so2) 655 { 656 int error; 657 658 error = so_pru_connect2(so1, so2); 659 return (error); 660 } 661 662 int 663 sodisconnect(struct socket *so) 664 { 665 int error; 666 667 if ((so->so_state & SS_ISCONNECTED) == 0) { 668 error = ENOTCONN; 669 goto bad; 670 } 671 if (so->so_state & SS_ISDISCONNECTING) { 672 error = EALREADY; 673 goto bad; 674 } 675 error = so_pru_disconnect(so); 676 bad: 677 return (error); 678 } 679 680 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 681 /* 682 * Send on a socket. 683 * If send must go all at once and message is larger than 684 * send buffering, then hard error. 685 * Lock against other senders. 686 * If must go all at once and not enough room now, then 687 * inform user that this would block and do nothing. 688 * Otherwise, if nonblocking, send as much as possible. 689 * The data to be sent is described by "uio" if nonzero, 690 * otherwise by the mbuf chain "top" (which must be null 691 * if uio is not). Data provided in mbuf chain must be small 692 * enough to send all at once. 693 * 694 * Returns nonzero on error, timeout or signal; callers 695 * must check for short counts if EINTR/ERESTART are returned. 696 * Data and control buffers are freed on return. 697 */ 698 int 699 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, 700 struct mbuf *top, struct mbuf *control, int flags, 701 struct thread *td) 702 { 703 struct mbuf **mp; 704 struct mbuf *m; 705 size_t resid; 706 int space, len; 707 int clen = 0, error, dontroute, mlen; 708 int atomic = sosendallatonce(so) || top; 709 int pru_flags; 710 711 if (uio) { 712 resid = uio->uio_resid; 713 } else { 714 resid = (size_t)top->m_pkthdr.len; 715 #ifdef INVARIANTS 716 len = 0; 717 for (m = top; m; m = m->m_next) 718 len += m->m_len; 719 KKASSERT(top->m_pkthdr.len == len); 720 #endif 721 } 722 723 /* 724 * WARNING! resid is unsigned, space and len are signed. space 725 * can wind up negative if the sockbuf is overcommitted. 726 * 727 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 728 * type sockets since that's an error. 729 */ 730 if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 731 error = EINVAL; 732 goto out; 733 } 734 735 dontroute = 736 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 737 (so->so_proto->pr_flags & PR_ATOMIC); 738 if (td->td_lwp != NULL) 739 td->td_lwp->lwp_ru.ru_msgsnd++; 740 if (control) 741 clen = control->m_len; 742 #define gotoerr(errcode) { error = errcode; goto release; } 743 744 restart: 745 error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); 746 if (error) 747 goto out; 748 749 do { 750 if (so->so_state & SS_CANTSENDMORE) 751 gotoerr(EPIPE); 752 if (so->so_error) { 753 error = so->so_error; 754 so->so_error = 0; 755 goto release; 756 } 757 if ((so->so_state & SS_ISCONNECTED) == 0) { 758 /* 759 * `sendto' and `sendmsg' is allowed on a connection- 760 * based socket if it supports implied connect. 761 * Return ENOTCONN if not connected and no address is 762 * supplied. 763 */ 764 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 765 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 766 if ((so->so_state & SS_ISCONFIRMING) == 0 && 767 !(resid == 0 && clen != 0)) 768 gotoerr(ENOTCONN); 769 } else if (addr == NULL) 770 gotoerr(so->so_proto->pr_flags & PR_CONNREQUIRED ? 771 ENOTCONN : EDESTADDRREQ); 772 } 773 if ((atomic && resid > so->so_snd.ssb_hiwat) || 774 clen > so->so_snd.ssb_hiwat) { 775 gotoerr(EMSGSIZE); 776 } 777 space = ssb_space(&so->so_snd); 778 if (flags & MSG_OOB) 779 space += 1024; 780 if ((space < 0 || (size_t)space < resid + clen) && uio && 781 (atomic || space < so->so_snd.ssb_lowat || space < clen)) { 782 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) 783 gotoerr(EWOULDBLOCK); 784 ssb_unlock(&so->so_snd); 785 error = ssb_wait(&so->so_snd); 786 if (error) 787 goto out; 788 goto restart; 789 } 790 mp = ⊤ 791 space -= clen; 792 do { 793 if (uio == NULL) { 794 /* 795 * Data is prepackaged in "top". 796 */ 797 resid = 0; 798 if (flags & MSG_EOR) 799 top->m_flags |= M_EOR; 800 } else do { 801 if (resid > INT_MAX) 802 resid = INT_MAX; 803 m = m_getl((int)resid, MB_WAIT, MT_DATA, 804 top == NULL ? M_PKTHDR : 0, &mlen); 805 if (top == NULL) { 806 m->m_pkthdr.len = 0; 807 m->m_pkthdr.rcvif = NULL; 808 } 809 len = imin((int)szmin(mlen, resid), space); 810 if (resid < MINCLSIZE) { 811 /* 812 * For datagram protocols, leave room 813 * for protocol headers in first mbuf. 814 */ 815 if (atomic && top == NULL && len < mlen) 816 MH_ALIGN(m, len); 817 } 818 space -= len; 819 error = uiomove(mtod(m, caddr_t), (size_t)len, uio); 820 resid = uio->uio_resid; 821 m->m_len = len; 822 *mp = m; 823 top->m_pkthdr.len += len; 824 if (error) 825 goto release; 826 mp = &m->m_next; 827 if (resid == 0) { 828 if (flags & MSG_EOR) 829 top->m_flags |= M_EOR; 830 break; 831 } 832 } while (space > 0 && atomic); 833 if (dontroute) 834 so->so_options |= SO_DONTROUTE; 835 if (flags & MSG_OOB) { 836 pru_flags = PRUS_OOB; 837 } else if ((flags & MSG_EOF) && 838 (so->so_proto->pr_flags & PR_IMPLOPCL) && 839 (resid == 0)) { 840 /* 841 * If the user set MSG_EOF, the protocol 842 * understands this flag and nothing left to 843 * send then use PRU_SEND_EOF instead of PRU_SEND. 844 */ 845 pru_flags = PRUS_EOF; 846 } else if (resid > 0 && space > 0) { 847 /* If there is more to send, set PRUS_MORETOCOME */ 848 pru_flags = PRUS_MORETOCOME; 849 } else { 850 pru_flags = 0; 851 } 852 /* 853 * XXX all the SS_CANTSENDMORE checks previously 854 * done could be out of date. We could have recieved 855 * a reset packet in an interrupt or maybe we slept 856 * while doing page faults in uiomove() etc. We could 857 * probably recheck again inside the splnet() protection 858 * here, but there are probably other places that this 859 * also happens. We must rethink this. 860 */ 861 error = so_pru_send(so, pru_flags, top, addr, control, td); 862 if (dontroute) 863 so->so_options &= ~SO_DONTROUTE; 864 clen = 0; 865 control = NULL; 866 top = NULL; 867 mp = ⊤ 868 if (error) 869 goto release; 870 } while (resid && space > 0); 871 } while (resid); 872 873 release: 874 ssb_unlock(&so->so_snd); 875 out: 876 if (top) 877 m_freem(top); 878 if (control) 879 m_freem(control); 880 return (error); 881 } 882 883 /* 884 * A specialization of sosend() for UDP based on protocol-specific knowledge: 885 * so->so_proto->pr_flags has the PR_ATOMIC field set. This means that 886 * sosendallatonce() returns true, 887 * the "atomic" variable is true, 888 * and sosendudp() blocks until space is available for the entire send. 889 * so->so_proto->pr_flags does not have the PR_CONNREQUIRED or 890 * PR_IMPLOPCL flags set. 891 * UDP has no out-of-band data. 892 * UDP has no control data. 893 * UDP does not support MSG_EOR. 894 */ 895 int 896 sosendudp(struct socket *so, struct sockaddr *addr, struct uio *uio, 897 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 898 { 899 size_t resid; 900 int error, pru_flags = 0; 901 int space; 902 903 if (td->td_lwp != NULL) 904 td->td_lwp->lwp_ru.ru_msgsnd++; 905 if (control) 906 m_freem(control); 907 908 KASSERT((uio && !top) || (top && !uio), ("bad arguments to sosendudp")); 909 resid = uio ? uio->uio_resid : (size_t)top->m_pkthdr.len; 910 911 restart: 912 error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); 913 if (error) 914 goto out; 915 916 if (so->so_state & SS_CANTSENDMORE) 917 gotoerr(EPIPE); 918 if (so->so_error) { 919 error = so->so_error; 920 so->so_error = 0; 921 goto release; 922 } 923 if (!(so->so_state & SS_ISCONNECTED) && addr == NULL) 924 gotoerr(EDESTADDRREQ); 925 if (resid > so->so_snd.ssb_hiwat) 926 gotoerr(EMSGSIZE); 927 space = ssb_space(&so->so_snd); 928 if (uio && (space < 0 || (size_t)space < resid)) { 929 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) 930 gotoerr(EWOULDBLOCK); 931 ssb_unlock(&so->so_snd); 932 error = ssb_wait(&so->so_snd); 933 if (error) 934 goto out; 935 goto restart; 936 } 937 938 if (uio) { 939 top = m_uiomove(uio); 940 if (top == NULL) 941 goto release; 942 } 943 944 if (flags & MSG_DONTROUTE) 945 pru_flags |= PRUS_DONTROUTE; 946 947 if (udp_sosend_async && (flags & MSG_SYNC) == 0) { 948 so_pru_send_async(so, pru_flags, top, addr, NULL, td); 949 error = 0; 950 } else { 951 error = so_pru_send(so, pru_flags, top, addr, NULL, td); 952 } 953 top = NULL; /* sent or freed in lower layer */ 954 955 release: 956 ssb_unlock(&so->so_snd); 957 out: 958 if (top) 959 m_freem(top); 960 return (error); 961 } 962 963 int 964 sosendtcp(struct socket *so, struct sockaddr *addr, struct uio *uio, 965 struct mbuf *top, struct mbuf *control, int flags, 966 struct thread *td) 967 { 968 struct mbuf **mp; 969 struct mbuf *m; 970 size_t resid; 971 int space, len; 972 int error, mlen; 973 int allatonce; 974 int pru_flags; 975 976 if (uio) { 977 KKASSERT(top == NULL); 978 allatonce = 0; 979 resid = uio->uio_resid; 980 } else { 981 allatonce = 1; 982 resid = (size_t)top->m_pkthdr.len; 983 #ifdef INVARIANTS 984 len = 0; 985 for (m = top; m; m = m->m_next) 986 len += m->m_len; 987 KKASSERT(top->m_pkthdr.len == len); 988 #endif 989 } 990 991 /* 992 * WARNING! resid is unsigned, space and len are signed. space 993 * can wind up negative if the sockbuf is overcommitted. 994 * 995 * Also check to make sure that MSG_EOR isn't used on TCP 996 */ 997 if (flags & MSG_EOR) { 998 error = EINVAL; 999 goto out; 1000 } 1001 1002 if (control) { 1003 /* TCP doesn't do control messages (rights, creds, etc) */ 1004 if (control->m_len) { 1005 error = EINVAL; 1006 goto out; 1007 } 1008 m_freem(control); /* empty control, just free it */ 1009 control = NULL; 1010 } 1011 1012 if (td->td_lwp != NULL) 1013 td->td_lwp->lwp_ru.ru_msgsnd++; 1014 1015 #define gotoerr(errcode) { error = errcode; goto release; } 1016 1017 restart: 1018 error = ssb_lock(&so->so_snd, SBLOCKWAIT(flags)); 1019 if (error) 1020 goto out; 1021 1022 do { 1023 if (so->so_state & SS_CANTSENDMORE) 1024 gotoerr(EPIPE); 1025 if (so->so_error) { 1026 error = so->so_error; 1027 so->so_error = 0; 1028 goto release; 1029 } 1030 if ((so->so_state & SS_ISCONNECTED) == 0 && 1031 (so->so_state & SS_ISCONFIRMING) == 0) 1032 gotoerr(ENOTCONN); 1033 if (allatonce && resid > so->so_snd.ssb_hiwat) 1034 gotoerr(EMSGSIZE); 1035 1036 space = ssb_space_prealloc(&so->so_snd); 1037 if (flags & MSG_OOB) 1038 space += 1024; 1039 if ((space < 0 || (size_t)space < resid) && !allatonce && 1040 space < so->so_snd.ssb_lowat) { 1041 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) 1042 gotoerr(EWOULDBLOCK); 1043 ssb_unlock(&so->so_snd); 1044 error = ssb_wait(&so->so_snd); 1045 if (error) 1046 goto out; 1047 goto restart; 1048 } 1049 mp = ⊤ 1050 do { 1051 int cnt = 0, async = 0; 1052 1053 if (uio == NULL) { 1054 /* 1055 * Data is prepackaged in "top". 1056 */ 1057 resid = 0; 1058 } else do { 1059 if (resid > INT_MAX) 1060 resid = INT_MAX; 1061 m = m_getl((int)resid, MB_WAIT, MT_DATA, 1062 top == NULL ? M_PKTHDR : 0, &mlen); 1063 if (top == NULL) { 1064 m->m_pkthdr.len = 0; 1065 m->m_pkthdr.rcvif = NULL; 1066 } 1067 len = imin((int)szmin(mlen, resid), space); 1068 space -= len; 1069 error = uiomove(mtod(m, caddr_t), (size_t)len, uio); 1070 resid = uio->uio_resid; 1071 m->m_len = len; 1072 *mp = m; 1073 top->m_pkthdr.len += len; 1074 if (error) 1075 goto release; 1076 mp = &m->m_next; 1077 if (resid == 0) 1078 break; 1079 ++cnt; 1080 } while (space > 0 && cnt < tcp_sosend_agglim); 1081 1082 if (tcp_sosend_async) 1083 async = 1; 1084 1085 if (flags & MSG_OOB) { 1086 pru_flags = PRUS_OOB; 1087 async = 0; 1088 } else if ((flags & MSG_EOF) && resid == 0) { 1089 pru_flags = PRUS_EOF; 1090 } else if (resid > 0 && space > 0) { 1091 /* If there is more to send, set PRUS_MORETOCOME */ 1092 pru_flags = PRUS_MORETOCOME; 1093 async = 1; 1094 } else { 1095 pru_flags = 0; 1096 } 1097 1098 if (flags & MSG_SYNC) 1099 async = 0; 1100 1101 /* 1102 * XXX all the SS_CANTSENDMORE checks previously 1103 * done could be out of date. We could have recieved 1104 * a reset packet in an interrupt or maybe we slept 1105 * while doing page faults in uiomove() etc. We could 1106 * probably recheck again inside the splnet() protection 1107 * here, but there are probably other places that this 1108 * also happens. We must rethink this. 1109 */ 1110 for (m = top; m; m = m->m_next) 1111 ssb_preallocstream(&so->so_snd, m); 1112 if (!async) { 1113 error = so_pru_send(so, pru_flags, top, 1114 NULL, NULL, td); 1115 } else { 1116 so_pru_send_async(so, pru_flags, top, 1117 NULL, NULL, td); 1118 error = 0; 1119 } 1120 1121 top = NULL; 1122 mp = ⊤ 1123 if (error) 1124 goto release; 1125 } while (resid && space > 0); 1126 } while (resid); 1127 1128 release: 1129 ssb_unlock(&so->so_snd); 1130 out: 1131 if (top) 1132 m_freem(top); 1133 if (control) 1134 m_freem(control); 1135 return (error); 1136 } 1137 1138 /* 1139 * Implement receive operations on a socket. 1140 * 1141 * We depend on the way that records are added to the signalsockbuf 1142 * by sbappend*. In particular, each record (mbufs linked through m_next) 1143 * must begin with an address if the protocol so specifies, 1144 * followed by an optional mbuf or mbufs containing ancillary data, 1145 * and then zero or more mbufs of data. 1146 * 1147 * Although the signalsockbuf is locked, new data may still be appended. 1148 * A token inside the ssb_lock deals with MP issues and still allows 1149 * the network to access the socket if we block in a uio. 1150 * 1151 * The caller may receive the data as a single mbuf chain by supplying 1152 * an mbuf **mp0 for use in returning the chain. The uio is then used 1153 * only for the count in uio_resid. 1154 */ 1155 int 1156 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, 1157 struct sockbuf *sio, struct mbuf **controlp, int *flagsp) 1158 { 1159 struct mbuf *m, *n; 1160 struct mbuf *free_chain = NULL; 1161 int flags, len, error, offset; 1162 struct protosw *pr = so->so_proto; 1163 int moff, type = 0; 1164 size_t resid, orig_resid; 1165 1166 if (uio) 1167 resid = uio->uio_resid; 1168 else 1169 resid = (size_t)(sio->sb_climit - sio->sb_cc); 1170 orig_resid = resid; 1171 1172 if (psa) 1173 *psa = NULL; 1174 if (controlp) 1175 *controlp = NULL; 1176 if (flagsp) 1177 flags = *flagsp &~ MSG_EOR; 1178 else 1179 flags = 0; 1180 if (flags & MSG_OOB) { 1181 m = m_get(MB_WAIT, MT_DATA); 1182 if (m == NULL) 1183 return (ENOBUFS); 1184 error = so_pru_rcvoob(so, m, flags & MSG_PEEK); 1185 if (error) 1186 goto bad; 1187 if (sio) { 1188 do { 1189 sbappend(sio, m); 1190 KKASSERT(resid >= (size_t)m->m_len); 1191 resid -= (size_t)m->m_len; 1192 } while (resid > 0 && m); 1193 } else { 1194 do { 1195 uio->uio_resid = resid; 1196 error = uiomove(mtod(m, caddr_t), 1197 (int)szmin(resid, m->m_len), 1198 uio); 1199 resid = uio->uio_resid; 1200 m = m_free(m); 1201 } while (uio->uio_resid && error == 0 && m); 1202 } 1203 bad: 1204 if (m) 1205 m_freem(m); 1206 return (error); 1207 } 1208 if ((so->so_state & SS_ISCONFIRMING) && resid) 1209 so_pru_rcvd(so, 0); 1210 1211 /* 1212 * The token interlocks against the protocol thread while 1213 * ssb_lock is a blocking lock against other userland entities. 1214 */ 1215 lwkt_gettoken(&so->so_rcv.ssb_token); 1216 restart: 1217 error = ssb_lock(&so->so_rcv, SBLOCKWAIT(flags)); 1218 if (error) 1219 goto done; 1220 1221 m = so->so_rcv.ssb_mb; 1222 /* 1223 * If we have less data than requested, block awaiting more 1224 * (subject to any timeout) if: 1225 * 1. the current count is less than the low water mark, or 1226 * 2. MSG_WAITALL is set, and it is possible to do the entire 1227 * receive operation at once if we block (resid <= hiwat). 1228 * 3. MSG_DONTWAIT is not set 1229 * If MSG_WAITALL is set but resid is larger than the receive buffer, 1230 * we have to do the receive in sections, and thus risk returning 1231 * a short count if a timeout or signal occurs after we start. 1232 */ 1233 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 1234 (size_t)so->so_rcv.ssb_cc < resid) && 1235 (so->so_rcv.ssb_cc < so->so_rcv.ssb_lowat || 1236 ((flags & MSG_WAITALL) && resid <= (size_t)so->so_rcv.ssb_hiwat)) && 1237 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 1238 KASSERT(m != NULL || !so->so_rcv.ssb_cc, ("receive 1")); 1239 if (so->so_error) { 1240 if (m) 1241 goto dontblock; 1242 error = so->so_error; 1243 if ((flags & MSG_PEEK) == 0) 1244 so->so_error = 0; 1245 goto release; 1246 } 1247 if (so->so_state & SS_CANTRCVMORE) { 1248 if (m) 1249 goto dontblock; 1250 else 1251 goto release; 1252 } 1253 for (; m; m = m->m_next) { 1254 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 1255 m = so->so_rcv.ssb_mb; 1256 goto dontblock; 1257 } 1258 } 1259 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1260 (pr->pr_flags & PR_CONNREQUIRED)) { 1261 error = ENOTCONN; 1262 goto release; 1263 } 1264 if (resid == 0) 1265 goto release; 1266 if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT)) { 1267 error = EWOULDBLOCK; 1268 goto release; 1269 } 1270 ssb_unlock(&so->so_rcv); 1271 error = ssb_wait(&so->so_rcv); 1272 if (error) 1273 goto done; 1274 goto restart; 1275 } 1276 dontblock: 1277 if (uio && uio->uio_td && uio->uio_td->td_proc) 1278 uio->uio_td->td_lwp->lwp_ru.ru_msgrcv++; 1279 1280 /* 1281 * note: m should be == sb_mb here. Cache the next record while 1282 * cleaning up. Note that calling m_free*() will break out critical 1283 * section. 1284 */ 1285 KKASSERT(m == so->so_rcv.ssb_mb); 1286 1287 /* 1288 * Skip any address mbufs prepending the record. 1289 */ 1290 if (pr->pr_flags & PR_ADDR) { 1291 KASSERT(m->m_type == MT_SONAME, ("receive 1a")); 1292 orig_resid = 0; 1293 if (psa) 1294 *psa = dup_sockaddr(mtod(m, struct sockaddr *)); 1295 if (flags & MSG_PEEK) 1296 m = m->m_next; 1297 else 1298 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 1299 } 1300 1301 /* 1302 * Skip any control mbufs prepending the record. 1303 */ 1304 #ifdef SCTP 1305 if (pr->pr_flags & PR_ADDR_OPT) { 1306 /* 1307 * For SCTP we may be getting a 1308 * whole message OR a partial delivery. 1309 */ 1310 if (m && m->m_type == MT_SONAME) { 1311 orig_resid = 0; 1312 if (psa) 1313 *psa = dup_sockaddr(mtod(m, struct sockaddr *)); 1314 if (flags & MSG_PEEK) 1315 m = m->m_next; 1316 else 1317 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 1318 } 1319 } 1320 #endif /* SCTP */ 1321 while (m && m->m_type == MT_CONTROL && error == 0) { 1322 if (flags & MSG_PEEK) { 1323 if (controlp) 1324 *controlp = m_copy(m, 0, m->m_len); 1325 m = m->m_next; /* XXX race */ 1326 } else { 1327 if (controlp) { 1328 n = sbunlinkmbuf(&so->so_rcv.sb, m, NULL); 1329 if (pr->pr_domain->dom_externalize && 1330 mtod(m, struct cmsghdr *)->cmsg_type == 1331 SCM_RIGHTS) 1332 error = (*pr->pr_domain->dom_externalize)(m); 1333 *controlp = m; 1334 m = n; 1335 } else { 1336 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 1337 } 1338 } 1339 if (controlp && *controlp) { 1340 orig_resid = 0; 1341 controlp = &(*controlp)->m_next; 1342 } 1343 } 1344 1345 /* 1346 * flag OOB data. 1347 */ 1348 if (m) { 1349 type = m->m_type; 1350 if (type == MT_OOBDATA) 1351 flags |= MSG_OOB; 1352 } 1353 1354 /* 1355 * Copy to the UIO or mbuf return chain (*mp). 1356 */ 1357 moff = 0; 1358 offset = 0; 1359 while (m && resid > 0 && error == 0) { 1360 if (m->m_type == MT_OOBDATA) { 1361 if (type != MT_OOBDATA) 1362 break; 1363 } else if (type == MT_OOBDATA) 1364 break; 1365 else 1366 KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, 1367 ("receive 3")); 1368 soclrstate(so, SS_RCVATMARK); 1369 len = (resid > INT_MAX) ? INT_MAX : resid; 1370 if (so->so_oobmark && len > so->so_oobmark - offset) 1371 len = so->so_oobmark - offset; 1372 if (len > m->m_len - moff) 1373 len = m->m_len - moff; 1374 1375 /* 1376 * Copy out to the UIO or pass the mbufs back to the SIO. 1377 * The SIO is dealt with when we eat the mbuf, but deal 1378 * with the resid here either way. 1379 */ 1380 if (uio) { 1381 uio->uio_resid = resid; 1382 error = uiomove(mtod(m, caddr_t) + moff, len, uio); 1383 resid = uio->uio_resid; 1384 if (error) 1385 goto release; 1386 } else { 1387 resid -= (size_t)len; 1388 } 1389 1390 /* 1391 * Eat the entire mbuf or just a piece of it 1392 */ 1393 if (len == m->m_len - moff) { 1394 if (m->m_flags & M_EOR) 1395 flags |= MSG_EOR; 1396 #ifdef SCTP 1397 if (m->m_flags & M_NOTIFICATION) 1398 flags |= MSG_NOTIFICATION; 1399 #endif /* SCTP */ 1400 if (flags & MSG_PEEK) { 1401 m = m->m_next; 1402 moff = 0; 1403 } else { 1404 if (sio) { 1405 n = sbunlinkmbuf(&so->so_rcv.sb, m, NULL); 1406 sbappend(sio, m); 1407 m = n; 1408 } else { 1409 m = sbunlinkmbuf(&so->so_rcv.sb, m, &free_chain); 1410 } 1411 } 1412 } else { 1413 if (flags & MSG_PEEK) { 1414 moff += len; 1415 } else { 1416 if (sio) { 1417 n = m_copym(m, 0, len, MB_WAIT); 1418 if (n) 1419 sbappend(sio, n); 1420 } 1421 m->m_data += len; 1422 m->m_len -= len; 1423 so->so_rcv.ssb_cc -= len; 1424 } 1425 } 1426 if (so->so_oobmark) { 1427 if ((flags & MSG_PEEK) == 0) { 1428 so->so_oobmark -= len; 1429 if (so->so_oobmark == 0) { 1430 sosetstate(so, SS_RCVATMARK); 1431 break; 1432 } 1433 } else { 1434 offset += len; 1435 if (offset == so->so_oobmark) 1436 break; 1437 } 1438 } 1439 if (flags & MSG_EOR) 1440 break; 1441 /* 1442 * If the MSG_WAITALL flag is set (for non-atomic socket), 1443 * we must not quit until resid == 0 or an error 1444 * termination. If a signal/timeout occurs, return 1445 * with a short count but without error. 1446 * Keep signalsockbuf locked against other readers. 1447 */ 1448 while ((flags & MSG_WAITALL) && m == NULL && 1449 resid > 0 && !sosendallatonce(so) && 1450 so->so_rcv.ssb_mb == NULL) { 1451 if (so->so_error || so->so_state & SS_CANTRCVMORE) 1452 break; 1453 /* 1454 * The window might have closed to zero, make 1455 * sure we send an ack now that we've drained 1456 * the buffer or we might end up blocking until 1457 * the idle takes over (5 seconds). 1458 */ 1459 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 1460 so_pru_rcvd(so, flags); 1461 error = ssb_wait(&so->so_rcv); 1462 if (error) { 1463 ssb_unlock(&so->so_rcv); 1464 error = 0; 1465 goto done; 1466 } 1467 m = so->so_rcv.ssb_mb; 1468 } 1469 } 1470 1471 /* 1472 * If an atomic read was requested but unread data still remains 1473 * in the record, set MSG_TRUNC. 1474 */ 1475 if (m && pr->pr_flags & PR_ATOMIC) 1476 flags |= MSG_TRUNC; 1477 1478 /* 1479 * Cleanup. If an atomic read was requested drop any unread data. 1480 */ 1481 if ((flags & MSG_PEEK) == 0) { 1482 if (m && (pr->pr_flags & PR_ATOMIC)) 1483 sbdroprecord(&so->so_rcv.sb); 1484 if ((pr->pr_flags & PR_WANTRCVD) && so->so_pcb) 1485 so_pru_rcvd(so, flags); 1486 } 1487 1488 if (orig_resid == resid && orig_resid && 1489 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 1490 ssb_unlock(&so->so_rcv); 1491 goto restart; 1492 } 1493 1494 if (flagsp) 1495 *flagsp |= flags; 1496 release: 1497 ssb_unlock(&so->so_rcv); 1498 done: 1499 lwkt_reltoken(&so->so_rcv.ssb_token); 1500 if (free_chain) 1501 m_freem(free_chain); 1502 return (error); 1503 } 1504 1505 /* 1506 * Shut a socket down. Note that we do not get a frontend lock as we 1507 * want to be able to shut the socket down even if another thread is 1508 * blocked in a read(), thus waking it up. 1509 */ 1510 int 1511 soshutdown(struct socket *so, int how) 1512 { 1513 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 1514 return (EINVAL); 1515 1516 if (how != SHUT_WR) { 1517 /*ssb_lock(&so->so_rcv, M_WAITOK);*/ 1518 sorflush(so); 1519 /*ssb_unlock(&so->so_rcv);*/ 1520 } 1521 if (how != SHUT_RD) 1522 return (so_pru_shutdown(so)); 1523 return (0); 1524 } 1525 1526 void 1527 sorflush(struct socket *so) 1528 { 1529 struct signalsockbuf *ssb = &so->so_rcv; 1530 struct protosw *pr = so->so_proto; 1531 struct signalsockbuf asb; 1532 1533 atomic_set_int(&ssb->ssb_flags, SSB_NOINTR); 1534 1535 lwkt_gettoken(&ssb->ssb_token); 1536 socantrcvmore(so); 1537 asb = *ssb; 1538 1539 /* 1540 * Can't just blow up the ssb structure here 1541 */ 1542 bzero(&ssb->sb, sizeof(ssb->sb)); 1543 ssb->ssb_timeo = 0; 1544 ssb->ssb_lowat = 0; 1545 ssb->ssb_hiwat = 0; 1546 ssb->ssb_mbmax = 0; 1547 atomic_clear_int(&ssb->ssb_flags, SSB_CLEAR_MASK); 1548 1549 if ((pr->pr_flags & PR_RIGHTS) && pr->pr_domain->dom_dispose) 1550 (*pr->pr_domain->dom_dispose)(asb.ssb_mb); 1551 ssb_release(&asb, so); 1552 1553 lwkt_reltoken(&ssb->ssb_token); 1554 } 1555 1556 #ifdef INET 1557 static int 1558 do_setopt_accept_filter(struct socket *so, struct sockopt *sopt) 1559 { 1560 struct accept_filter_arg *afap = NULL; 1561 struct accept_filter *afp; 1562 struct so_accf *af = so->so_accf; 1563 int error = 0; 1564 1565 /* do not set/remove accept filters on non listen sockets */ 1566 if ((so->so_options & SO_ACCEPTCONN) == 0) { 1567 error = EINVAL; 1568 goto out; 1569 } 1570 1571 /* removing the filter */ 1572 if (sopt == NULL) { 1573 if (af != NULL) { 1574 if (af->so_accept_filter != NULL && 1575 af->so_accept_filter->accf_destroy != NULL) { 1576 af->so_accept_filter->accf_destroy(so); 1577 } 1578 if (af->so_accept_filter_str != NULL) { 1579 kfree(af->so_accept_filter_str, M_ACCF); 1580 } 1581 kfree(af, M_ACCF); 1582 so->so_accf = NULL; 1583 } 1584 so->so_options &= ~SO_ACCEPTFILTER; 1585 return (0); 1586 } 1587 /* adding a filter */ 1588 /* must remove previous filter first */ 1589 if (af != NULL) { 1590 error = EINVAL; 1591 goto out; 1592 } 1593 /* don't put large objects on the kernel stack */ 1594 afap = kmalloc(sizeof(*afap), M_TEMP, M_WAITOK); 1595 error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap); 1596 afap->af_name[sizeof(afap->af_name)-1] = '\0'; 1597 afap->af_arg[sizeof(afap->af_arg)-1] = '\0'; 1598 if (error) 1599 goto out; 1600 afp = accept_filt_get(afap->af_name); 1601 if (afp == NULL) { 1602 error = ENOENT; 1603 goto out; 1604 } 1605 af = kmalloc(sizeof(*af), M_ACCF, M_WAITOK | M_ZERO); 1606 if (afp->accf_create != NULL) { 1607 if (afap->af_name[0] != '\0') { 1608 int len = strlen(afap->af_name) + 1; 1609 1610 af->so_accept_filter_str = kmalloc(len, M_ACCF, 1611 M_WAITOK); 1612 strcpy(af->so_accept_filter_str, afap->af_name); 1613 } 1614 af->so_accept_filter_arg = afp->accf_create(so, afap->af_arg); 1615 if (af->so_accept_filter_arg == NULL) { 1616 kfree(af->so_accept_filter_str, M_ACCF); 1617 kfree(af, M_ACCF); 1618 so->so_accf = NULL; 1619 error = EINVAL; 1620 goto out; 1621 } 1622 } 1623 af->so_accept_filter = afp; 1624 so->so_accf = af; 1625 so->so_options |= SO_ACCEPTFILTER; 1626 out: 1627 if (afap != NULL) 1628 kfree(afap, M_TEMP); 1629 return (error); 1630 } 1631 #endif /* INET */ 1632 1633 /* 1634 * Perhaps this routine, and sooptcopyout(), below, ought to come in 1635 * an additional variant to handle the case where the option value needs 1636 * to be some kind of integer, but not a specific size. 1637 * In addition to their use here, these functions are also called by the 1638 * protocol-level pr_ctloutput() routines. 1639 */ 1640 int 1641 sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) 1642 { 1643 return soopt_to_kbuf(sopt, buf, len, minlen); 1644 } 1645 1646 int 1647 soopt_to_kbuf(struct sockopt *sopt, void *buf, size_t len, size_t minlen) 1648 { 1649 size_t valsize; 1650 1651 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 1652 KKASSERT(kva_p(buf)); 1653 1654 /* 1655 * If the user gives us more than we wanted, we ignore it, 1656 * but if we don't get the minimum length the caller 1657 * wants, we return EINVAL. On success, sopt->sopt_valsize 1658 * is set to however much we actually retrieved. 1659 */ 1660 if ((valsize = sopt->sopt_valsize) < minlen) 1661 return EINVAL; 1662 if (valsize > len) 1663 sopt->sopt_valsize = valsize = len; 1664 1665 bcopy(sopt->sopt_val, buf, valsize); 1666 return 0; 1667 } 1668 1669 1670 int 1671 sosetopt(struct socket *so, struct sockopt *sopt) 1672 { 1673 int error, optval; 1674 struct linger l; 1675 struct timeval tv; 1676 u_long val; 1677 struct signalsockbuf *sotmp; 1678 1679 error = 0; 1680 sopt->sopt_dir = SOPT_SET; 1681 if (sopt->sopt_level != SOL_SOCKET) { 1682 if (so->so_proto && so->so_proto->pr_ctloutput) { 1683 return (so_pr_ctloutput(so, sopt)); 1684 } 1685 error = ENOPROTOOPT; 1686 } else { 1687 switch (sopt->sopt_name) { 1688 #ifdef INET 1689 case SO_ACCEPTFILTER: 1690 error = do_setopt_accept_filter(so, sopt); 1691 if (error) 1692 goto bad; 1693 break; 1694 #endif /* INET */ 1695 case SO_LINGER: 1696 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 1697 if (error) 1698 goto bad; 1699 1700 so->so_linger = l.l_linger; 1701 if (l.l_onoff) 1702 so->so_options |= SO_LINGER; 1703 else 1704 so->so_options &= ~SO_LINGER; 1705 break; 1706 1707 case SO_DEBUG: 1708 case SO_KEEPALIVE: 1709 case SO_DONTROUTE: 1710 case SO_USELOOPBACK: 1711 case SO_BROADCAST: 1712 case SO_REUSEADDR: 1713 case SO_REUSEPORT: 1714 case SO_OOBINLINE: 1715 case SO_TIMESTAMP: 1716 error = sooptcopyin(sopt, &optval, sizeof optval, 1717 sizeof optval); 1718 if (error) 1719 goto bad; 1720 if (optval) 1721 so->so_options |= sopt->sopt_name; 1722 else 1723 so->so_options &= ~sopt->sopt_name; 1724 break; 1725 1726 case SO_SNDBUF: 1727 case SO_RCVBUF: 1728 case SO_SNDLOWAT: 1729 case SO_RCVLOWAT: 1730 error = sooptcopyin(sopt, &optval, sizeof optval, 1731 sizeof optval); 1732 if (error) 1733 goto bad; 1734 1735 /* 1736 * Values < 1 make no sense for any of these 1737 * options, so disallow them. 1738 */ 1739 if (optval < 1) { 1740 error = EINVAL; 1741 goto bad; 1742 } 1743 1744 switch (sopt->sopt_name) { 1745 case SO_SNDBUF: 1746 case SO_RCVBUF: 1747 if (ssb_reserve(sopt->sopt_name == SO_SNDBUF ? 1748 &so->so_snd : &so->so_rcv, (u_long)optval, 1749 so, 1750 &curproc->p_rlimit[RLIMIT_SBSIZE]) == 0) { 1751 error = ENOBUFS; 1752 goto bad; 1753 } 1754 sotmp = (sopt->sopt_name == SO_SNDBUF) ? 1755 &so->so_snd : &so->so_rcv; 1756 atomic_clear_int(&sotmp->ssb_flags, 1757 SSB_AUTOSIZE); 1758 break; 1759 1760 /* 1761 * Make sure the low-water is never greater than 1762 * the high-water. 1763 */ 1764 case SO_SNDLOWAT: 1765 so->so_snd.ssb_lowat = 1766 (optval > so->so_snd.ssb_hiwat) ? 1767 so->so_snd.ssb_hiwat : optval; 1768 atomic_clear_int(&so->so_snd.ssb_flags, 1769 SSB_AUTOLOWAT); 1770 break; 1771 case SO_RCVLOWAT: 1772 so->so_rcv.ssb_lowat = 1773 (optval > so->so_rcv.ssb_hiwat) ? 1774 so->so_rcv.ssb_hiwat : optval; 1775 atomic_clear_int(&so->so_rcv.ssb_flags, 1776 SSB_AUTOLOWAT); 1777 break; 1778 } 1779 break; 1780 1781 case SO_SNDTIMEO: 1782 case SO_RCVTIMEO: 1783 error = sooptcopyin(sopt, &tv, sizeof tv, 1784 sizeof tv); 1785 if (error) 1786 goto bad; 1787 1788 /* assert(hz > 0); */ 1789 if (tv.tv_sec < 0 || tv.tv_sec > INT_MAX / hz || 1790 tv.tv_usec < 0 || tv.tv_usec >= 1000000) { 1791 error = EDOM; 1792 goto bad; 1793 } 1794 /* assert(tick > 0); */ 1795 /* assert(ULONG_MAX - INT_MAX >= 1000000); */ 1796 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / ustick; 1797 if (val > INT_MAX) { 1798 error = EDOM; 1799 goto bad; 1800 } 1801 if (val == 0 && tv.tv_usec != 0) 1802 val = 1; 1803 1804 switch (sopt->sopt_name) { 1805 case SO_SNDTIMEO: 1806 so->so_snd.ssb_timeo = val; 1807 break; 1808 case SO_RCVTIMEO: 1809 so->so_rcv.ssb_timeo = val; 1810 break; 1811 } 1812 break; 1813 default: 1814 error = ENOPROTOOPT; 1815 break; 1816 } 1817 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1818 (void) so_pr_ctloutput(so, sopt); 1819 } 1820 } 1821 bad: 1822 return (error); 1823 } 1824 1825 /* Helper routine for getsockopt */ 1826 int 1827 sooptcopyout(struct sockopt *sopt, const void *buf, size_t len) 1828 { 1829 soopt_from_kbuf(sopt, buf, len); 1830 return 0; 1831 } 1832 1833 void 1834 soopt_from_kbuf(struct sockopt *sopt, const void *buf, size_t len) 1835 { 1836 size_t valsize; 1837 1838 if (len == 0) { 1839 sopt->sopt_valsize = 0; 1840 return; 1841 } 1842 1843 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 1844 KKASSERT(kva_p(buf)); 1845 1846 /* 1847 * Documented get behavior is that we always return a value, 1848 * possibly truncated to fit in the user's buffer. 1849 * Traditional behavior is that we always tell the user 1850 * precisely how much we copied, rather than something useful 1851 * like the total amount we had available for her. 1852 * Note that this interface is not idempotent; the entire answer must 1853 * generated ahead of time. 1854 */ 1855 valsize = szmin(len, sopt->sopt_valsize); 1856 sopt->sopt_valsize = valsize; 1857 if (sopt->sopt_val != 0) { 1858 bcopy(buf, sopt->sopt_val, valsize); 1859 } 1860 } 1861 1862 int 1863 sogetopt(struct socket *so, struct sockopt *sopt) 1864 { 1865 int error, optval; 1866 long optval_l; 1867 struct linger l; 1868 struct timeval tv; 1869 #ifdef INET 1870 struct accept_filter_arg *afap; 1871 #endif 1872 1873 error = 0; 1874 sopt->sopt_dir = SOPT_GET; 1875 if (sopt->sopt_level != SOL_SOCKET) { 1876 if (so->so_proto && so->so_proto->pr_ctloutput) { 1877 return (so_pr_ctloutput(so, sopt)); 1878 } else 1879 return (ENOPROTOOPT); 1880 } else { 1881 switch (sopt->sopt_name) { 1882 #ifdef INET 1883 case SO_ACCEPTFILTER: 1884 if ((so->so_options & SO_ACCEPTCONN) == 0) 1885 return (EINVAL); 1886 afap = kmalloc(sizeof(*afap), M_TEMP, 1887 M_WAITOK | M_ZERO); 1888 if ((so->so_options & SO_ACCEPTFILTER) != 0) { 1889 strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name); 1890 if (so->so_accf->so_accept_filter_str != NULL) 1891 strcpy(afap->af_arg, so->so_accf->so_accept_filter_str); 1892 } 1893 error = sooptcopyout(sopt, afap, sizeof(*afap)); 1894 kfree(afap, M_TEMP); 1895 break; 1896 #endif /* INET */ 1897 1898 case SO_LINGER: 1899 l.l_onoff = so->so_options & SO_LINGER; 1900 l.l_linger = so->so_linger; 1901 error = sooptcopyout(sopt, &l, sizeof l); 1902 break; 1903 1904 case SO_USELOOPBACK: 1905 case SO_DONTROUTE: 1906 case SO_DEBUG: 1907 case SO_KEEPALIVE: 1908 case SO_REUSEADDR: 1909 case SO_REUSEPORT: 1910 case SO_BROADCAST: 1911 case SO_OOBINLINE: 1912 case SO_TIMESTAMP: 1913 optval = so->so_options & sopt->sopt_name; 1914 integer: 1915 error = sooptcopyout(sopt, &optval, sizeof optval); 1916 break; 1917 1918 case SO_TYPE: 1919 optval = so->so_type; 1920 goto integer; 1921 1922 case SO_ERROR: 1923 optval = so->so_error; 1924 so->so_error = 0; 1925 goto integer; 1926 1927 case SO_SNDBUF: 1928 optval = so->so_snd.ssb_hiwat; 1929 goto integer; 1930 1931 case SO_RCVBUF: 1932 optval = so->so_rcv.ssb_hiwat; 1933 goto integer; 1934 1935 case SO_SNDLOWAT: 1936 optval = so->so_snd.ssb_lowat; 1937 goto integer; 1938 1939 case SO_RCVLOWAT: 1940 optval = so->so_rcv.ssb_lowat; 1941 goto integer; 1942 1943 case SO_SNDTIMEO: 1944 case SO_RCVTIMEO: 1945 optval = (sopt->sopt_name == SO_SNDTIMEO ? 1946 so->so_snd.ssb_timeo : so->so_rcv.ssb_timeo); 1947 1948 tv.tv_sec = optval / hz; 1949 tv.tv_usec = (optval % hz) * ustick; 1950 error = sooptcopyout(sopt, &tv, sizeof tv); 1951 break; 1952 1953 case SO_SNDSPACE: 1954 optval_l = ssb_space(&so->so_snd); 1955 error = sooptcopyout(sopt, &optval_l, sizeof(optval_l)); 1956 break; 1957 1958 default: 1959 error = ENOPROTOOPT; 1960 break; 1961 } 1962 return (error); 1963 } 1964 } 1965 1966 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ 1967 int 1968 soopt_getm(struct sockopt *sopt, struct mbuf **mp) 1969 { 1970 struct mbuf *m, *m_prev; 1971 int sopt_size = sopt->sopt_valsize, msize; 1972 1973 m = m_getl(sopt_size, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, MT_DATA, 1974 0, &msize); 1975 if (m == NULL) 1976 return (ENOBUFS); 1977 m->m_len = min(msize, sopt_size); 1978 sopt_size -= m->m_len; 1979 *mp = m; 1980 m_prev = m; 1981 1982 while (sopt_size > 0) { 1983 m = m_getl(sopt_size, sopt->sopt_td ? MB_WAIT : MB_DONTWAIT, 1984 MT_DATA, 0, &msize); 1985 if (m == NULL) { 1986 m_freem(*mp); 1987 return (ENOBUFS); 1988 } 1989 m->m_len = min(msize, sopt_size); 1990 sopt_size -= m->m_len; 1991 m_prev->m_next = m; 1992 m_prev = m; 1993 } 1994 return (0); 1995 } 1996 1997 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ 1998 int 1999 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) 2000 { 2001 soopt_to_mbuf(sopt, m); 2002 return 0; 2003 } 2004 2005 void 2006 soopt_to_mbuf(struct sockopt *sopt, struct mbuf *m) 2007 { 2008 size_t valsize; 2009 void *val; 2010 2011 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 2012 KKASSERT(kva_p(m)); 2013 if (sopt->sopt_val == NULL) 2014 return; 2015 val = sopt->sopt_val; 2016 valsize = sopt->sopt_valsize; 2017 while (m != NULL && valsize >= m->m_len) { 2018 bcopy(val, mtod(m, char *), m->m_len); 2019 valsize -= m->m_len; 2020 val = (caddr_t)val + m->m_len; 2021 m = m->m_next; 2022 } 2023 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ 2024 panic("ip6_sooptmcopyin"); 2025 } 2026 2027 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ 2028 int 2029 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) 2030 { 2031 return soopt_from_mbuf(sopt, m); 2032 } 2033 2034 int 2035 soopt_from_mbuf(struct sockopt *sopt, struct mbuf *m) 2036 { 2037 struct mbuf *m0 = m; 2038 size_t valsize = 0; 2039 size_t maxsize; 2040 void *val; 2041 2042 KKASSERT(!sopt->sopt_val || kva_p(sopt->sopt_val)); 2043 KKASSERT(kva_p(m)); 2044 if (sopt->sopt_val == NULL) 2045 return 0; 2046 val = sopt->sopt_val; 2047 maxsize = sopt->sopt_valsize; 2048 while (m != NULL && maxsize >= m->m_len) { 2049 bcopy(mtod(m, char *), val, m->m_len); 2050 maxsize -= m->m_len; 2051 val = (caddr_t)val + m->m_len; 2052 valsize += m->m_len; 2053 m = m->m_next; 2054 } 2055 if (m != NULL) { 2056 /* enough soopt buffer should be given from user-land */ 2057 m_freem(m0); 2058 return (EINVAL); 2059 } 2060 sopt->sopt_valsize = valsize; 2061 return 0; 2062 } 2063 2064 void 2065 sohasoutofband(struct socket *so) 2066 { 2067 if (so->so_sigio != NULL) 2068 pgsigio(so->so_sigio, SIGURG, 0); 2069 KNOTE(&so->so_rcv.ssb_kq.ki_note, NOTE_OOB); 2070 } 2071 2072 int 2073 sokqfilter(struct file *fp, struct knote *kn) 2074 { 2075 struct socket *so = (struct socket *)kn->kn_fp->f_data; 2076 struct signalsockbuf *ssb; 2077 2078 switch (kn->kn_filter) { 2079 case EVFILT_READ: 2080 if (so->so_options & SO_ACCEPTCONN) 2081 kn->kn_fop = &solisten_filtops; 2082 else 2083 kn->kn_fop = &soread_filtops; 2084 ssb = &so->so_rcv; 2085 break; 2086 case EVFILT_WRITE: 2087 kn->kn_fop = &sowrite_filtops; 2088 ssb = &so->so_snd; 2089 break; 2090 case EVFILT_EXCEPT: 2091 kn->kn_fop = &soexcept_filtops; 2092 ssb = &so->so_rcv; 2093 break; 2094 default: 2095 return (EOPNOTSUPP); 2096 } 2097 2098 knote_insert(&ssb->ssb_kq.ki_note, kn); 2099 atomic_set_int(&ssb->ssb_flags, SSB_KNOTE); 2100 return (0); 2101 } 2102 2103 static void 2104 filt_sordetach(struct knote *kn) 2105 { 2106 struct socket *so = (struct socket *)kn->kn_fp->f_data; 2107 2108 knote_remove(&so->so_rcv.ssb_kq.ki_note, kn); 2109 if (SLIST_EMPTY(&so->so_rcv.ssb_kq.ki_note)) 2110 atomic_clear_int(&so->so_rcv.ssb_flags, SSB_KNOTE); 2111 } 2112 2113 /*ARGSUSED*/ 2114 static int 2115 filt_soread(struct knote *kn, long hint) 2116 { 2117 struct socket *so = (struct socket *)kn->kn_fp->f_data; 2118 2119 if (kn->kn_sfflags & NOTE_OOB) { 2120 if ((so->so_oobmark || (so->so_state & SS_RCVATMARK))) { 2121 kn->kn_fflags |= NOTE_OOB; 2122 return (1); 2123 } 2124 return (0); 2125 } 2126 kn->kn_data = so->so_rcv.ssb_cc; 2127 2128 if (so->so_state & SS_CANTRCVMORE) { 2129 /* 2130 * Only set NODATA if all data has been exhausted. 2131 */ 2132 if (kn->kn_data == 0) 2133 kn->kn_flags |= EV_NODATA; 2134 kn->kn_flags |= EV_EOF; 2135 kn->kn_fflags = so->so_error; 2136 return (1); 2137 } 2138 if (so->so_error) /* temporary udp error */ 2139 return (1); 2140 if (kn->kn_sfflags & NOTE_LOWAT) 2141 return (kn->kn_data >= kn->kn_sdata); 2142 return ((kn->kn_data >= so->so_rcv.ssb_lowat) || 2143 !TAILQ_EMPTY(&so->so_comp)); 2144 } 2145 2146 static void 2147 filt_sowdetach(struct knote *kn) 2148 { 2149 struct socket *so = (struct socket *)kn->kn_fp->f_data; 2150 2151 knote_remove(&so->so_snd.ssb_kq.ki_note, kn); 2152 if (SLIST_EMPTY(&so->so_snd.ssb_kq.ki_note)) 2153 atomic_clear_int(&so->so_snd.ssb_flags, SSB_KNOTE); 2154 } 2155 2156 /*ARGSUSED*/ 2157 static int 2158 filt_sowrite(struct knote *kn, long hint) 2159 { 2160 struct socket *so = (struct socket *)kn->kn_fp->f_data; 2161 2162 kn->kn_data = ssb_space(&so->so_snd); 2163 if (so->so_state & SS_CANTSENDMORE) { 2164 kn->kn_flags |= (EV_EOF | EV_NODATA); 2165 kn->kn_fflags = so->so_error; 2166 return (1); 2167 } 2168 if (so->so_error) /* temporary udp error */ 2169 return (1); 2170 if (((so->so_state & SS_ISCONNECTED) == 0) && 2171 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 2172 return (0); 2173 if (kn->kn_sfflags & NOTE_LOWAT) 2174 return (kn->kn_data >= kn->kn_sdata); 2175 return (kn->kn_data >= so->so_snd.ssb_lowat); 2176 } 2177 2178 /*ARGSUSED*/ 2179 static int 2180 filt_solisten(struct knote *kn, long hint) 2181 { 2182 struct socket *so = (struct socket *)kn->kn_fp->f_data; 2183 2184 kn->kn_data = so->so_qlen; 2185 return (! TAILQ_EMPTY(&so->so_comp)); 2186 } 2187