1 /* $OpenBSD: uipc_socket.c,v 1.89 2011/04/04 11:10:26 claudio Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/domain.h> 43 #include <sys/kernel.h> 44 #include <sys/event.h> 45 #include <sys/protosw.h> 46 #include <sys/socket.h> 47 #include <sys/unpcb.h> 48 #include <sys/socketvar.h> 49 #include <sys/signalvar.h> 50 #include <sys/resourcevar.h> 51 #include <net/route.h> 52 #include <sys/pool.h> 53 54 int sosplice(struct socket *, int, off_t); 55 int somove(struct socket *, int); 56 void filt_sordetach(struct knote *kn); 57 int filt_soread(struct knote *kn, long hint); 58 void filt_sowdetach(struct knote *kn); 59 int filt_sowrite(struct knote *kn, long hint); 60 int filt_solisten(struct knote *kn, long hint); 61 62 struct filterops solisten_filtops = 63 { 1, NULL, filt_sordetach, filt_solisten }; 64 struct filterops soread_filtops = 65 { 1, NULL, filt_sordetach, filt_soread }; 66 struct filterops sowrite_filtops = 67 { 1, NULL, filt_sowdetach, filt_sowrite }; 68 69 70 #ifndef SOMINCONN 71 #define SOMINCONN 80 72 #endif /* SOMINCONN */ 73 74 int somaxconn = SOMAXCONN; 75 int sominconn = SOMINCONN; 76 77 struct pool socket_pool; 78 79 void 80 soinit(void) 81 { 82 83 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, "sockpl", NULL); 84 } 85 86 /* 87 * Socket operation routines. 88 * These routines are called by the routines in 89 * sys_socket.c or from a system process, and 90 * implement the semantics of socket operations by 91 * switching out to the protocol specific routines. 92 */ 93 /*ARGSUSED*/ 94 int 95 socreate(int dom, struct socket **aso, int type, int proto) 96 { 97 struct proc *p = curproc; /* XXX */ 98 struct protosw *prp; 99 struct socket *so; 100 int error, s; 101 102 if (proto) 103 prp = pffindproto(dom, proto, type); 104 else 105 prp = pffindtype(dom, type); 106 if (prp == NULL || prp->pr_usrreq == 0) 107 return (EPROTONOSUPPORT); 108 if (prp->pr_type != type) 109 return (EPROTOTYPE); 110 s = splsoftnet(); 111 so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO); 112 TAILQ_INIT(&so->so_q0); 113 TAILQ_INIT(&so->so_q); 114 so->so_type = type; 115 if (suser(p, 0) == 0) 116 so->so_state = SS_PRIV; 117 so->so_ruid = p->p_cred->p_ruid; 118 so->so_euid = p->p_ucred->cr_uid; 119 so->so_rgid = p->p_cred->p_rgid; 120 so->so_egid = p->p_ucred->cr_gid; 121 so->so_cpid = p->p_pid; 122 so->so_proto = prp; 123 error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, 124 (struct mbuf *)(long)proto, NULL, p); 125 if (error) { 126 so->so_state |= SS_NOFDREF; 127 sofree(so); 128 splx(s); 129 return (error); 130 } 131 splx(s); 132 *aso = so; 133 return (0); 134 } 135 136 int 137 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 138 { 139 int s = splsoftnet(); 140 int error; 141 142 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p); 143 splx(s); 144 return (error); 145 } 146 147 int 148 solisten(struct socket *so, int backlog) 149 { 150 int s, error; 151 152 #ifdef SOCKET_SPLICE 153 if (so->so_splice || so->so_spliceback) 154 return (EOPNOTSUPP); 155 #endif /* SOCKET_SPLICE */ 156 s = splsoftnet(); 157 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL, 158 curproc); 159 if (error) { 160 splx(s); 161 return (error); 162 } 163 if (TAILQ_FIRST(&so->so_q) == NULL) 164 so->so_options |= SO_ACCEPTCONN; 165 if (backlog < 0 || backlog > somaxconn) 166 backlog = somaxconn; 167 if (backlog < sominconn) 168 backlog = sominconn; 169 so->so_qlimit = backlog; 170 splx(s); 171 return (0); 172 } 173 174 /* 175 * Must be called at splsoftnet() 176 */ 177 178 void 179 sofree(struct socket *so) 180 { 181 splsoftassert(IPL_SOFTNET); 182 183 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 184 return; 185 if (so->so_head) { 186 /* 187 * We must not decommission a socket that's on the accept(2) 188 * queue. If we do, then accept(2) may hang after select(2) 189 * indicated that the listening socket was ready. 190 */ 191 if (!soqremque(so, 0)) 192 return; 193 } 194 #ifdef SOCKET_SPLICE 195 if (so->so_spliceback) { 196 so->so_snd.sb_flags &= ~SB_SPLICE; 197 so->so_spliceback->so_rcv.sb_flags &= ~SB_SPLICE; 198 so->so_spliceback->so_splice = NULL; 199 if (soreadable(so->so_spliceback)) 200 sorwakeup(so->so_spliceback); 201 } 202 if (so->so_splice) { 203 so->so_splice->so_snd.sb_flags &= ~SB_SPLICE; 204 so->so_rcv.sb_flags &= ~SB_SPLICE; 205 so->so_splice->so_spliceback = NULL; 206 } 207 so->so_spliceback = so->so_splice = NULL; 208 #endif /* SOCKET_SPLICE */ 209 sbrelease(&so->so_snd); 210 sorflush(so); 211 pool_put(&socket_pool, so); 212 } 213 214 /* 215 * Close a socket on last file table reference removal. 216 * Initiate disconnect if connected. 217 * Free socket when disconnect complete. 218 */ 219 int 220 soclose(struct socket *so) 221 { 222 struct socket *so2; 223 int s = splsoftnet(); /* conservative */ 224 int error = 0; 225 226 if (so->so_options & SO_ACCEPTCONN) { 227 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { 228 (void) soqremque(so2, 0); 229 (void) soabort(so2); 230 } 231 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { 232 (void) soqremque(so2, 1); 233 (void) soabort(so2); 234 } 235 } 236 if (so->so_pcb == 0) 237 goto discard; 238 if (so->so_state & SS_ISCONNECTED) { 239 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 240 error = sodisconnect(so); 241 if (error) 242 goto drop; 243 } 244 if (so->so_options & SO_LINGER) { 245 if ((so->so_state & SS_ISDISCONNECTING) && 246 (so->so_state & SS_NBIO)) 247 goto drop; 248 while (so->so_state & SS_ISCONNECTED) { 249 error = tsleep(&so->so_timeo, 250 PSOCK | PCATCH, "netcls", 251 so->so_linger * hz); 252 if (error) 253 break; 254 } 255 } 256 } 257 drop: 258 if (so->so_pcb) { 259 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, NULL, 260 NULL, NULL, curproc); 261 if (error == 0) 262 error = error2; 263 } 264 discard: 265 if (so->so_state & SS_NOFDREF) 266 panic("soclose: NOFDREF"); 267 so->so_state |= SS_NOFDREF; 268 sofree(so); 269 splx(s); 270 return (error); 271 } 272 273 /* 274 * Must be called at splsoftnet. 275 */ 276 int 277 soabort(struct socket *so) 278 { 279 splsoftassert(IPL_SOFTNET); 280 281 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL, 282 curproc); 283 } 284 285 int 286 soaccept(struct socket *so, struct mbuf *nam) 287 { 288 int s = splsoftnet(); 289 int error = 0; 290 291 if ((so->so_state & SS_NOFDREF) == 0) 292 panic("soaccept: !NOFDREF"); 293 so->so_state &= ~SS_NOFDREF; 294 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 295 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 296 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, NULL, 297 nam, NULL, curproc); 298 else 299 error = ECONNABORTED; 300 splx(s); 301 return (error); 302 } 303 304 int 305 soconnect(struct socket *so, struct mbuf *nam) 306 { 307 int s; 308 int error; 309 310 if (so->so_options & SO_ACCEPTCONN) 311 return (EOPNOTSUPP); 312 s = splsoftnet(); 313 /* 314 * If protocol is connection-based, can only connect once. 315 * Otherwise, if connected, try to disconnect first. 316 * This allows user to disconnect by connecting to, e.g., 317 * a null address. 318 */ 319 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 320 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 321 (error = sodisconnect(so)))) 322 error = EISCONN; 323 else 324 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 325 NULL, nam, NULL, curproc); 326 splx(s); 327 return (error); 328 } 329 330 int 331 soconnect2(struct socket *so1, struct socket *so2) 332 { 333 int s = splsoftnet(); 334 int error; 335 336 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, 337 (struct mbuf *)so2, NULL, curproc); 338 splx(s); 339 return (error); 340 } 341 342 int 343 sodisconnect(struct socket *so) 344 { 345 int s = splsoftnet(); 346 int error; 347 348 if ((so->so_state & SS_ISCONNECTED) == 0) { 349 error = ENOTCONN; 350 goto bad; 351 } 352 if (so->so_state & SS_ISDISCONNECTING) { 353 error = EALREADY; 354 goto bad; 355 } 356 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, 357 NULL, curproc); 358 bad: 359 splx(s); 360 return (error); 361 } 362 363 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 364 /* 365 * Send on a socket. 366 * If send must go all at once and message is larger than 367 * send buffering, then hard error. 368 * Lock against other senders. 369 * If must go all at once and not enough room now, then 370 * inform user that this would block and do nothing. 371 * Otherwise, if nonblocking, send as much as possible. 372 * The data to be sent is described by "uio" if nonzero, 373 * otherwise by the mbuf chain "top" (which must be null 374 * if uio is not). Data provided in mbuf chain must be small 375 * enough to send all at once. 376 * 377 * Returns nonzero on error, timeout or signal; callers 378 * must check for short counts if EINTR/ERESTART are returned. 379 * Data and control buffers are freed on return. 380 */ 381 int 382 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 383 struct mbuf *control, int flags) 384 { 385 struct mbuf **mp; 386 struct mbuf *m; 387 long space, len, mlen, clen = 0; 388 quad_t resid; 389 int error, s, dontroute; 390 int atomic = sosendallatonce(so) || top; 391 392 if (uio) 393 resid = uio->uio_resid; 394 else 395 resid = top->m_pkthdr.len; 396 /* 397 * In theory resid should be unsigned (since uio->uio_resid is). 398 * However, space must be signed, as it might be less than 0 399 * if we over-committed, and we must use a signed comparison 400 * of space and resid. On the other hand, a negative resid 401 * causes us to loop sending 0-length segments to the protocol. 402 * MSG_EOR on a SOCK_STREAM socket is also invalid. 403 */ 404 if (resid < 0 || 405 (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 406 error = EINVAL; 407 goto out; 408 } 409 dontroute = 410 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 411 (so->so_proto->pr_flags & PR_ATOMIC); 412 if (uio && uio->uio_procp) 413 uio->uio_procp->p_stats->p_ru.ru_msgsnd++; 414 if (control) 415 clen = control->m_len; 416 #define snderr(errno) { error = errno; splx(s); goto release; } 417 418 restart: 419 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 420 goto out; 421 so->so_state |= SS_ISSENDING; 422 do { 423 s = splsoftnet(); 424 if (so->so_state & SS_CANTSENDMORE) 425 snderr(EPIPE); 426 if (so->so_error) { 427 error = so->so_error; 428 so->so_error = 0; 429 splx(s); 430 goto release; 431 } 432 if ((so->so_state & SS_ISCONNECTED) == 0) { 433 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 434 if ((so->so_state & SS_ISCONFIRMING) == 0 && 435 !(resid == 0 && clen != 0)) 436 snderr(ENOTCONN); 437 } else if (addr == 0) 438 snderr(EDESTADDRREQ); 439 } 440 space = sbspace(&so->so_snd); 441 if (flags & MSG_OOB) 442 space += 1024; 443 if ((atomic && resid > so->so_snd.sb_hiwat) || 444 clen > so->so_snd.sb_hiwat) 445 snderr(EMSGSIZE); 446 if (space < resid + clen && 447 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 448 if (so->so_state & SS_NBIO) 449 snderr(EWOULDBLOCK); 450 sbunlock(&so->so_snd); 451 error = sbwait(&so->so_snd); 452 so->so_state &= ~SS_ISSENDING; 453 splx(s); 454 if (error) 455 goto out; 456 goto restart; 457 } 458 splx(s); 459 mp = ⊤ 460 space -= clen; 461 do { 462 if (uio == NULL) { 463 /* 464 * Data is prepackaged in "top". 465 */ 466 resid = 0; 467 if (flags & MSG_EOR) 468 top->m_flags |= M_EOR; 469 } else do { 470 if (top == 0) { 471 MGETHDR(m, M_WAIT, MT_DATA); 472 mlen = MHLEN; 473 m->m_pkthdr.len = 0; 474 m->m_pkthdr.rcvif = (struct ifnet *)0; 475 } else { 476 MGET(m, M_WAIT, MT_DATA); 477 mlen = MLEN; 478 } 479 if (resid >= MINCLSIZE && space >= MCLBYTES) { 480 MCLGET(m, M_NOWAIT); 481 if ((m->m_flags & M_EXT) == 0) 482 goto nopages; 483 mlen = MCLBYTES; 484 if (atomic && top == 0) { 485 len = lmin(MCLBYTES - max_hdr, resid); 486 m->m_data += max_hdr; 487 } else 488 len = lmin(MCLBYTES, resid); 489 space -= len; 490 } else { 491 nopages: 492 len = lmin(lmin(mlen, resid), space); 493 space -= len; 494 /* 495 * For datagram protocols, leave room 496 * for protocol headers in first mbuf. 497 */ 498 if (atomic && top == 0 && len < mlen) 499 MH_ALIGN(m, len); 500 } 501 error = uiomove(mtod(m, caddr_t), (int)len, 502 uio); 503 resid = uio->uio_resid; 504 m->m_len = len; 505 *mp = m; 506 top->m_pkthdr.len += len; 507 if (error) 508 goto release; 509 mp = &m->m_next; 510 if (resid <= 0) { 511 if (flags & MSG_EOR) 512 top->m_flags |= M_EOR; 513 break; 514 } 515 } while (space > 0 && atomic); 516 if (dontroute) 517 so->so_options |= SO_DONTROUTE; 518 s = splsoftnet(); /* XXX */ 519 if (resid <= 0) 520 so->so_state &= ~SS_ISSENDING; 521 error = (*so->so_proto->pr_usrreq)(so, 522 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 523 top, addr, control, curproc); 524 splx(s); 525 if (dontroute) 526 so->so_options &= ~SO_DONTROUTE; 527 clen = 0; 528 control = 0; 529 top = 0; 530 mp = ⊤ 531 if (error) 532 goto release; 533 } while (resid && space > 0); 534 } while (resid); 535 536 release: 537 so->so_state &= ~SS_ISSENDING; 538 sbunlock(&so->so_snd); 539 out: 540 if (top) 541 m_freem(top); 542 if (control) 543 m_freem(control); 544 return (error); 545 } 546 547 /* 548 * Implement receive operations on a socket. 549 * We depend on the way that records are added to the sockbuf 550 * by sbappend*. In particular, each record (mbufs linked through m_next) 551 * must begin with an address if the protocol so specifies, 552 * followed by an optional mbuf or mbufs containing ancillary data, 553 * and then zero or more mbufs of data. 554 * In order to avoid blocking network interrupts for the entire time here, 555 * we splx() while doing the actual copy to user space. 556 * Although the sockbuf is locked, new data may still be appended, 557 * and thus we must maintain consistency of the sockbuf during that time. 558 * 559 * The caller may receive the data as a single mbuf chain by supplying 560 * an mbuf **mp0 for use in returning the chain. The uio is then used 561 * only for the count in uio_resid. 562 */ 563 int 564 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 565 struct mbuf **mp0, struct mbuf **controlp, int *flagsp, 566 socklen_t controllen) 567 { 568 struct mbuf *m, **mp; 569 int flags, len, error, s, offset; 570 struct protosw *pr = so->so_proto; 571 struct mbuf *nextrecord; 572 int moff, type = 0; 573 size_t orig_resid = uio->uio_resid; 574 int uio_error = 0; 575 int resid; 576 577 mp = mp0; 578 if (paddr) 579 *paddr = 0; 580 if (controlp) 581 *controlp = 0; 582 if (flagsp) 583 flags = *flagsp &~ MSG_EOR; 584 else 585 flags = 0; 586 if (so->so_state & SS_NBIO) 587 flags |= MSG_DONTWAIT; 588 if (flags & MSG_OOB) { 589 m = m_get(M_WAIT, MT_DATA); 590 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 591 (struct mbuf *)(long)(flags & MSG_PEEK), NULL, curproc); 592 if (error) 593 goto bad; 594 do { 595 error = uiomove(mtod(m, caddr_t), 596 (int) min(uio->uio_resid, m->m_len), uio); 597 m = m_free(m); 598 } while (uio->uio_resid && error == 0 && m); 599 bad: 600 if (m) 601 m_freem(m); 602 return (error); 603 } 604 if (mp) 605 *mp = NULL; 606 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 607 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL, curproc); 608 609 restart: 610 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 611 return (error); 612 s = splsoftnet(); 613 614 m = so->so_rcv.sb_mb; 615 #ifdef SOCKET_SPLICE 616 if (so->so_splice) 617 m = NULL; 618 #endif /* SOCKET_SPLICE */ 619 /* 620 * If we have less data than requested, block awaiting more 621 * (subject to any timeout) if: 622 * 1. the current count is less than the low water mark, 623 * 2. MSG_WAITALL is set, and it is possible to do the entire 624 * receive operation at once if we block (resid <= hiwat), or 625 * 3. MSG_DONTWAIT is not set. 626 * If MSG_WAITALL is set but resid is larger than the receive buffer, 627 * we have to do the receive in sections, and thus risk returning 628 * a short count if a timeout or signal occurs after we start. 629 */ 630 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 631 so->so_rcv.sb_cc < uio->uio_resid) && 632 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 633 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 634 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 635 #ifdef DIAGNOSTIC 636 if (m == NULL && so->so_rcv.sb_cc) 637 #ifdef SOCKET_SPLICE 638 if (so->so_splice == NULL) 639 #endif /* SOCKET_SPLICE */ 640 panic("receive 1"); 641 #endif 642 if (so->so_error) { 643 if (m) 644 goto dontblock; 645 error = so->so_error; 646 if ((flags & MSG_PEEK) == 0) 647 so->so_error = 0; 648 goto release; 649 } 650 if (so->so_state & SS_CANTRCVMORE) { 651 if (m) 652 goto dontblock; 653 else if (so->so_rcv.sb_cc == 0) 654 goto release; 655 } 656 for (; m; m = m->m_next) 657 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 658 m = so->so_rcv.sb_mb; 659 goto dontblock; 660 } 661 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 662 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 663 error = ENOTCONN; 664 goto release; 665 } 666 if (uio->uio_resid == 0 && controlp == NULL) 667 goto release; 668 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 669 error = EWOULDBLOCK; 670 goto release; 671 } 672 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 673 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 674 sbunlock(&so->so_rcv); 675 error = sbwait(&so->so_rcv); 676 splx(s); 677 if (error) 678 return (error); 679 goto restart; 680 } 681 dontblock: 682 /* 683 * On entry here, m points to the first record of the socket buffer. 684 * While we process the initial mbufs containing address and control 685 * info, we save a copy of m->m_nextpkt into nextrecord. 686 */ 687 if (uio->uio_procp) 688 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 689 KASSERT(m == so->so_rcv.sb_mb); 690 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 691 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 692 nextrecord = m->m_nextpkt; 693 if (pr->pr_flags & PR_ADDR) { 694 #ifdef DIAGNOSTIC 695 if (m->m_type != MT_SONAME) 696 panic("receive 1a"); 697 #endif 698 orig_resid = 0; 699 if (flags & MSG_PEEK) { 700 if (paddr) 701 *paddr = m_copy(m, 0, m->m_len); 702 m = m->m_next; 703 } else { 704 sbfree(&so->so_rcv, m); 705 if (paddr) { 706 *paddr = m; 707 so->so_rcv.sb_mb = m->m_next; 708 m->m_next = 0; 709 m = so->so_rcv.sb_mb; 710 } else { 711 MFREE(m, so->so_rcv.sb_mb); 712 m = so->so_rcv.sb_mb; 713 } 714 } 715 } 716 while (m && m->m_type == MT_CONTROL && error == 0) { 717 if (flags & MSG_PEEK) { 718 if (controlp) 719 *controlp = m_copy(m, 0, m->m_len); 720 m = m->m_next; 721 } else { 722 sbfree(&so->so_rcv, m); 723 if (controlp) { 724 if (pr->pr_domain->dom_externalize && 725 mtod(m, struct cmsghdr *)->cmsg_type == 726 SCM_RIGHTS) 727 error = (*pr->pr_domain->dom_externalize)(m, 728 controllen); 729 *controlp = m; 730 so->so_rcv.sb_mb = m->m_next; 731 m->m_next = 0; 732 m = so->so_rcv.sb_mb; 733 } else { 734 /* 735 * Dispose of any SCM_RIGHTS message that went 736 * through the read path rather than recv. 737 */ 738 if (pr->pr_domain->dom_dispose && 739 mtod(m, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 740 pr->pr_domain->dom_dispose(m); 741 MFREE(m, so->so_rcv.sb_mb); 742 m = so->so_rcv.sb_mb; 743 } 744 } 745 if (controlp) { 746 orig_resid = 0; 747 controlp = &(*controlp)->m_next; 748 } 749 } 750 751 /* 752 * If m is non-NULL, we have some data to read. From now on, 753 * make sure to keep sb_lastrecord consistent when working on 754 * the last packet on the chain (nextrecord == NULL) and we 755 * change m->m_nextpkt. 756 */ 757 if (m) { 758 if ((flags & MSG_PEEK) == 0) { 759 m->m_nextpkt = nextrecord; 760 /* 761 * If nextrecord == NULL (this is a single chain), 762 * then sb_lastrecord may not be valid here if m 763 * was changed earlier. 764 */ 765 if (nextrecord == NULL) { 766 KASSERT(so->so_rcv.sb_mb == m); 767 so->so_rcv.sb_lastrecord = m; 768 } 769 } 770 type = m->m_type; 771 if (type == MT_OOBDATA) 772 flags |= MSG_OOB; 773 if (m->m_flags & M_BCAST) 774 flags |= MSG_BCAST; 775 if (m->m_flags & M_MCAST) 776 flags |= MSG_MCAST; 777 } else { 778 if ((flags & MSG_PEEK) == 0) { 779 KASSERT(so->so_rcv.sb_mb == m); 780 so->so_rcv.sb_mb = nextrecord; 781 SB_EMPTY_FIXUP(&so->so_rcv); 782 } 783 } 784 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 785 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 786 787 moff = 0; 788 offset = 0; 789 while (m && uio->uio_resid > 0 && error == 0) { 790 if (m->m_type == MT_OOBDATA) { 791 if (type != MT_OOBDATA) 792 break; 793 } else if (type == MT_OOBDATA) 794 break; 795 #ifdef DIAGNOSTIC 796 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 797 panic("receive 3"); 798 #endif 799 so->so_state &= ~SS_RCVATMARK; 800 len = uio->uio_resid; 801 if (so->so_oobmark && len > so->so_oobmark - offset) 802 len = so->so_oobmark - offset; 803 if (len > m->m_len - moff) 804 len = m->m_len - moff; 805 /* 806 * If mp is set, just pass back the mbufs. 807 * Otherwise copy them out via the uio, then free. 808 * Sockbuf must be consistent here (points to current mbuf, 809 * it points to next record) when we drop priority; 810 * we must note any additions to the sockbuf when we 811 * block interrupts again. 812 */ 813 if (mp == NULL && uio_error == 0) { 814 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 815 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 816 resid = uio->uio_resid; 817 splx(s); 818 uio_error = 819 uiomove(mtod(m, caddr_t) + moff, (int)len, 820 uio); 821 s = splsoftnet(); 822 if (uio_error) 823 uio->uio_resid = resid - len; 824 } else 825 uio->uio_resid -= len; 826 if (len == m->m_len - moff) { 827 if (m->m_flags & M_EOR) 828 flags |= MSG_EOR; 829 if (flags & MSG_PEEK) { 830 m = m->m_next; 831 moff = 0; 832 } else { 833 nextrecord = m->m_nextpkt; 834 sbfree(&so->so_rcv, m); 835 if (mp) { 836 *mp = m; 837 mp = &m->m_next; 838 so->so_rcv.sb_mb = m = m->m_next; 839 *mp = NULL; 840 } else { 841 MFREE(m, so->so_rcv.sb_mb); 842 m = so->so_rcv.sb_mb; 843 } 844 /* 845 * If m != NULL, we also know that 846 * so->so_rcv.sb_mb != NULL. 847 */ 848 KASSERT(so->so_rcv.sb_mb == m); 849 if (m) { 850 m->m_nextpkt = nextrecord; 851 if (nextrecord == NULL) 852 so->so_rcv.sb_lastrecord = m; 853 } else { 854 so->so_rcv.sb_mb = nextrecord; 855 SB_EMPTY_FIXUP(&so->so_rcv); 856 } 857 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 858 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 859 } 860 } else { 861 if (flags & MSG_PEEK) 862 moff += len; 863 else { 864 if (mp) 865 *mp = m_copym(m, 0, len, M_WAIT); 866 m->m_data += len; 867 m->m_len -= len; 868 so->so_rcv.sb_cc -= len; 869 so->so_rcv.sb_datacc -= len; 870 } 871 } 872 if (so->so_oobmark) { 873 if ((flags & MSG_PEEK) == 0) { 874 so->so_oobmark -= len; 875 if (so->so_oobmark == 0) { 876 so->so_state |= SS_RCVATMARK; 877 break; 878 } 879 } else { 880 offset += len; 881 if (offset == so->so_oobmark) 882 break; 883 } 884 } 885 if (flags & MSG_EOR) 886 break; 887 /* 888 * If the MSG_WAITALL flag is set (for non-atomic socket), 889 * we must not quit until "uio->uio_resid == 0" or an error 890 * termination. If a signal/timeout occurs, return 891 * with a short count but without error. 892 * Keep sockbuf locked against other readers. 893 */ 894 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 895 !sosendallatonce(so) && !nextrecord) { 896 if (so->so_error || so->so_state & SS_CANTRCVMORE) 897 break; 898 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 899 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 900 error = sbwait(&so->so_rcv); 901 if (error) { 902 sbunlock(&so->so_rcv); 903 splx(s); 904 return (0); 905 } 906 if ((m = so->so_rcv.sb_mb) != NULL) 907 nextrecord = m->m_nextpkt; 908 } 909 } 910 911 if (m && pr->pr_flags & PR_ATOMIC) { 912 flags |= MSG_TRUNC; 913 if ((flags & MSG_PEEK) == 0) 914 (void) sbdroprecord(&so->so_rcv); 915 } 916 if ((flags & MSG_PEEK) == 0) { 917 if (m == NULL) { 918 /* 919 * First part is an inline SB_EMPTY_FIXUP(). Second 920 * part makes sure sb_lastrecord is up-to-date if 921 * there is still data in the socket buffer. 922 */ 923 so->so_rcv.sb_mb = nextrecord; 924 if (so->so_rcv.sb_mb == NULL) { 925 so->so_rcv.sb_mbtail = NULL; 926 so->so_rcv.sb_lastrecord = NULL; 927 } else if (nextrecord->m_nextpkt == NULL) 928 so->so_rcv.sb_lastrecord = nextrecord; 929 } 930 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 931 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 932 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 933 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, 934 (struct mbuf *)(long)flags, NULL, curproc); 935 } 936 if (orig_resid == uio->uio_resid && orig_resid && 937 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 938 sbunlock(&so->so_rcv); 939 splx(s); 940 goto restart; 941 } 942 943 if (uio_error) 944 error = uio_error; 945 946 if (flagsp) 947 *flagsp |= flags; 948 release: 949 sbunlock(&so->so_rcv); 950 splx(s); 951 return (error); 952 } 953 954 int 955 soshutdown(struct socket *so, int how) 956 { 957 struct protosw *pr = so->so_proto; 958 959 switch (how) { 960 case SHUT_RD: 961 case SHUT_RDWR: 962 sorflush(so); 963 if (how == SHUT_RD) 964 return (0); 965 /* FALLTHROUGH */ 966 case SHUT_WR: 967 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, NULL, NULL, 968 curproc); 969 default: 970 return (EINVAL); 971 } 972 } 973 974 void 975 sorflush(struct socket *so) 976 { 977 struct sockbuf *sb = &so->so_rcv; 978 struct protosw *pr = so->so_proto; 979 int s; 980 struct sockbuf asb; 981 982 sb->sb_flags |= SB_NOINTR; 983 (void) sblock(sb, M_WAITOK); 984 s = splnet(); 985 socantrcvmore(so); 986 sbunlock(sb); 987 asb = *sb; 988 bzero(sb, sizeof (*sb)); 989 /* XXX - the bzero stumps all over so_rcv */ 990 if (asb.sb_flags & SB_KNOTE) { 991 sb->sb_sel.si_note = asb.sb_sel.si_note; 992 sb->sb_flags = SB_KNOTE; 993 } 994 splx(s); 995 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 996 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 997 sbrelease(&asb); 998 } 999 1000 #ifdef SOCKET_SPLICE 1001 int 1002 sosplice(struct socket *so, int fd, off_t max) 1003 { 1004 struct file *fp; 1005 struct socket *sosp; 1006 int s, error = 0; 1007 1008 if ((so->so_proto->pr_flags & PR_SPLICE) == 0) 1009 return (EPROTONOSUPPORT); 1010 if (so->so_options & SO_ACCEPTCONN) 1011 return (EOPNOTSUPP); 1012 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) 1013 return (ENOTCONN); 1014 1015 /* If no fd is given, unsplice by removing existing link. */ 1016 if (fd < 0) { 1017 s = splsoftnet(); 1018 if (so->so_splice) { 1019 so->so_splice->so_snd.sb_flags &= ~SB_SPLICE; 1020 so->so_rcv.sb_flags &= ~SB_SPLICE; 1021 so->so_splice->so_spliceback = NULL; 1022 so->so_splice = NULL; 1023 if (soreadable(so)) 1024 sorwakeup(so); 1025 } 1026 splx(s); 1027 return (0); 1028 } 1029 1030 if (max && max < 0) 1031 return (EINVAL); 1032 1033 /* Find sosp, the drain socket where data will be spliced into. */ 1034 if ((error = getsock(curproc->p_fd, fd, &fp)) != 0) 1035 return (error); 1036 sosp = fp->f_data; 1037 1038 /* Lock both receive and send buffer. */ 1039 if ((error = sblock(&so->so_rcv, 1040 (so->so_state & SS_NBIO) ? M_NOWAIT : M_WAITOK)) != 0) { 1041 FRELE(fp); 1042 return (error); 1043 } 1044 if ((error = sblock(&sosp->so_snd, M_WAITOK)) != 0) { 1045 sbunlock(&so->so_rcv); 1046 FRELE(fp); 1047 return (error); 1048 } 1049 s = splsoftnet(); 1050 1051 if (so->so_splice || sosp->so_spliceback) { 1052 error = EBUSY; 1053 goto release; 1054 } 1055 if (sosp->so_proto->pr_usrreq != so->so_proto->pr_usrreq) { 1056 error = EPROTONOSUPPORT; 1057 goto release; 1058 } 1059 if (sosp->so_options & SO_ACCEPTCONN) { 1060 error = EOPNOTSUPP; 1061 goto release; 1062 } 1063 if ((sosp->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) { 1064 error = ENOTCONN; 1065 goto release; 1066 } 1067 1068 /* Splice so and sosp together. */ 1069 so->so_splice = sosp; 1070 sosp->so_spliceback = so; 1071 so->so_splicelen = 0; 1072 so->so_splicemax = max; 1073 1074 /* 1075 * To prevent softnet interrupt from calling somove() while 1076 * we sleep, the socket buffers are not marked as spliced yet. 1077 */ 1078 if (somove(so, M_WAIT)) { 1079 so->so_rcv.sb_flags |= SB_SPLICE; 1080 sosp->so_snd.sb_flags |= SB_SPLICE; 1081 } 1082 1083 release: 1084 splx(s); 1085 sbunlock(&sosp->so_snd); 1086 sbunlock(&so->so_rcv); 1087 FRELE(fp); 1088 return (error); 1089 } 1090 1091 /* 1092 * Move data from receive buffer of spliced source socket to send 1093 * buffer of drain socket. Try to move as much as possible in one 1094 * big chunk. It is a TCP only implementation. 1095 * Return value 0 means splicing has been finished, 1 continue. 1096 */ 1097 int 1098 somove(struct socket *so, int wait) 1099 { 1100 struct socket *sosp = so->so_splice; 1101 struct mbuf *m = NULL, **mp; 1102 u_long len, off, oobmark; 1103 long space; 1104 int error = 0, maxreached = 0; 1105 short state; 1106 1107 splsoftassert(IPL_SOFTNET); 1108 1109 if (so->so_error) { 1110 error = so->so_error; 1111 goto release; 1112 } 1113 if (sosp->so_state & SS_CANTSENDMORE) { 1114 error = EPIPE; 1115 goto release; 1116 } 1117 if (sosp->so_error) { 1118 error = sosp->so_error; 1119 goto release; 1120 } 1121 if ((sosp->so_state & SS_ISCONNECTED) == 0) 1122 goto release; 1123 1124 /* Calculate how many bytes can be copied now. */ 1125 len = so->so_rcv.sb_cc; 1126 if (len == 0) 1127 goto release; 1128 if (so->so_splicemax) { 1129 KASSERT(so->so_splicelen < so->so_splicemax); 1130 if (so->so_splicemax <= so->so_splicelen + len) { 1131 len = so->so_splicemax - so->so_splicelen; 1132 maxreached = 1; 1133 } 1134 } 1135 space = sbspace(&sosp->so_snd); 1136 if (so->so_oobmark && so->so_oobmark < len && 1137 so->so_oobmark < space + 1024) 1138 space += 1024; 1139 if (space <= 0) { 1140 maxreached = 0; 1141 goto release; 1142 } 1143 if (space < len) { 1144 maxreached = 0; 1145 if (space < sosp->so_snd.sb_lowat) 1146 goto release; 1147 len = space; 1148 } 1149 sosp->so_state |= SS_ISSENDING; 1150 1151 /* Take at most len mbufs out of receive buffer. */ 1152 m = so->so_rcv.sb_mb; 1153 for (off = 0, mp = &m; off < len; 1154 off += (*mp)->m_len, mp = &(*mp)->m_next) { 1155 u_long size = len - off; 1156 1157 if ((*mp)->m_len > size) { 1158 if (!maxreached || (*mp = m_copym( 1159 so->so_rcv.sb_mb, 0, size, wait)) == NULL) { 1160 len -= size; 1161 break; 1162 } 1163 so->so_rcv.sb_mb->m_data += size; 1164 so->so_rcv.sb_mb->m_len -= size; 1165 so->so_rcv.sb_cc -= size; 1166 so->so_rcv.sb_datacc -= size; 1167 } else { 1168 *mp = so->so_rcv.sb_mb; 1169 sbfree(&so->so_rcv, *mp); 1170 so->so_rcv.sb_mb = (*mp)->m_next; 1171 } 1172 } 1173 *mp = NULL; 1174 SB_EMPTY_FIXUP(&so->so_rcv); 1175 so->so_rcv.sb_lastrecord = so->so_rcv.sb_mb; 1176 1177 SBLASTRECORDCHK(&so->so_rcv, "somove"); 1178 SBLASTMBUFCHK(&so->so_rcv, "somove"); 1179 KDASSERT(m->m_nextpkt == NULL); 1180 KASSERT(so->so_rcv.sb_mb == so->so_rcv.sb_lastrecord); 1181 #ifdef SOCKBUF_DEBUG 1182 sbcheck(&so->so_rcv); 1183 #endif 1184 1185 /* Send window update to source peer if receive buffer has changed. */ 1186 if (m) 1187 (so->so_proto->pr_usrreq)(so, PRU_RCVD, NULL, 1188 (struct mbuf *)0L, NULL, NULL); 1189 1190 /* Receive buffer did shrink by len bytes, adjust oob. */ 1191 state = so->so_state; 1192 so->so_state &= ~SS_RCVATMARK; 1193 oobmark = so->so_oobmark; 1194 so->so_oobmark = oobmark > len ? oobmark - len : 0; 1195 if (oobmark) { 1196 if (oobmark == len) 1197 so->so_state |= SS_RCVATMARK; 1198 if (oobmark >= len) 1199 oobmark = 0; 1200 } 1201 1202 /* 1203 * Handle oob data. If any malloc fails, ignore error. 1204 * TCP urgent data is not very reliable anyway. 1205 */ 1206 while (m && ((state & SS_RCVATMARK) || oobmark) && 1207 (so->so_options & SO_OOBINLINE)) { 1208 struct mbuf *o = NULL; 1209 1210 if (state & SS_RCVATMARK) { 1211 o = m_get(wait, MT_DATA); 1212 state &= ~SS_RCVATMARK; 1213 } else if (oobmark) { 1214 o = m_split(m, oobmark, wait); 1215 if (o) { 1216 error = (*sosp->so_proto->pr_usrreq)(sosp, 1217 PRU_SEND, m, NULL, NULL, NULL); 1218 m = NULL; 1219 if (error) { 1220 m_freem(o); 1221 if (sosp->so_state & SS_CANTSENDMORE) 1222 error = EPIPE; 1223 goto release; 1224 } 1225 len -= oobmark; 1226 so->so_splicelen += oobmark; 1227 m = o; 1228 o = m_get(wait, MT_DATA); 1229 } 1230 oobmark = 0; 1231 } 1232 if (o) { 1233 o->m_len = 1; 1234 *mtod(o, caddr_t) = *mtod(m, caddr_t); 1235 error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SENDOOB, 1236 o, NULL, NULL, NULL); 1237 if (error) { 1238 if (sosp->so_state & SS_CANTSENDMORE) 1239 error = EPIPE; 1240 goto release; 1241 } 1242 len -= 1; 1243 so->so_splicelen += 1; 1244 if (oobmark) { 1245 oobmark -= 1; 1246 if (oobmark == 0) 1247 state |= SS_RCVATMARK; 1248 } 1249 m_adj(m, 1); 1250 } 1251 } 1252 1253 /* Append all remaining data to drain socket. */ 1254 if (m) { 1255 if (so->so_rcv.sb_cc == 0 || maxreached) 1256 sosp->so_state &= ~SS_ISSENDING; 1257 error = (*sosp->so_proto->pr_usrreq)(sosp, PRU_SEND, m, NULL, 1258 NULL, NULL); 1259 m = NULL; 1260 if (error) { 1261 if (sosp->so_state & SS_CANTSENDMORE) 1262 error = EPIPE; 1263 goto release; 1264 } 1265 so->so_splicelen += len; 1266 } 1267 1268 release: 1269 if (m) 1270 m_freem(m); 1271 sosp->so_state &= ~SS_ISSENDING; 1272 if (error) 1273 so->so_error = error; 1274 if (((so->so_state & SS_CANTRCVMORE) && so->so_rcv.sb_cc == 0) || 1275 (sosp->so_state & SS_CANTSENDMORE) || maxreached || error) { 1276 sosp->so_snd.sb_flags &= ~SB_SPLICE; 1277 so->so_rcv.sb_flags &= ~SB_SPLICE; 1278 so->so_splice = sosp->so_spliceback = NULL; 1279 if (soreadable(so)) 1280 sorwakeup(so); 1281 return (0); 1282 } 1283 return (1); 1284 } 1285 1286 void 1287 sorwakeup(struct socket *so) 1288 { 1289 if (so->so_rcv.sb_flags & SB_SPLICE) { 1290 (void) somove(so, M_DONTWAIT); 1291 return; 1292 } 1293 _sorwakeup(so); 1294 } 1295 1296 void 1297 sowwakeup(struct socket *so) 1298 { 1299 if (so->so_snd.sb_flags & SB_SPLICE) 1300 (void) somove(so->so_spliceback, M_DONTWAIT); 1301 _sowwakeup(so); 1302 } 1303 #endif /* SOCKET_SPLICE */ 1304 1305 int 1306 sosetopt(struct socket *so, int level, int optname, struct mbuf *m0) 1307 { 1308 int error = 0; 1309 struct mbuf *m = m0; 1310 1311 if (level != SOL_SOCKET) { 1312 if (so->so_proto && so->so_proto->pr_ctloutput) 1313 return ((*so->so_proto->pr_ctloutput) 1314 (PRCO_SETOPT, so, level, optname, &m0)); 1315 error = ENOPROTOOPT; 1316 } else { 1317 switch (optname) { 1318 case SO_BINDANY: 1319 if ((error = suser(curproc, 0)) != 0) /* XXX */ 1320 goto bad; 1321 break; 1322 } 1323 1324 switch (optname) { 1325 1326 case SO_LINGER: 1327 if (m == NULL || m->m_len != sizeof (struct linger) || 1328 mtod(m, struct linger *)->l_linger < 0 || 1329 mtod(m, struct linger *)->l_linger > SHRT_MAX) { 1330 error = EINVAL; 1331 goto bad; 1332 } 1333 so->so_linger = mtod(m, struct linger *)->l_linger; 1334 /* FALLTHROUGH */ 1335 1336 case SO_BINDANY: 1337 case SO_DEBUG: 1338 case SO_KEEPALIVE: 1339 case SO_DONTROUTE: 1340 case SO_USELOOPBACK: 1341 case SO_BROADCAST: 1342 case SO_REUSEADDR: 1343 case SO_REUSEPORT: 1344 case SO_OOBINLINE: 1345 case SO_JUMBO: 1346 case SO_TIMESTAMP: 1347 if (m == NULL || m->m_len < sizeof (int)) { 1348 error = EINVAL; 1349 goto bad; 1350 } 1351 if (*mtod(m, int *)) 1352 so->so_options |= optname; 1353 else 1354 so->so_options &= ~optname; 1355 break; 1356 1357 case SO_SNDBUF: 1358 case SO_RCVBUF: 1359 case SO_SNDLOWAT: 1360 case SO_RCVLOWAT: 1361 { 1362 u_long cnt; 1363 1364 if (m == NULL || m->m_len < sizeof (int)) { 1365 error = EINVAL; 1366 goto bad; 1367 } 1368 cnt = *mtod(m, int *); 1369 if ((long)cnt <= 0) 1370 cnt = 1; 1371 switch (optname) { 1372 1373 case SO_SNDBUF: 1374 if (so->so_state & SS_CANTSENDMORE) { 1375 error = EINVAL; 1376 goto bad; 1377 } 1378 if (sbcheckreserve(cnt, so->so_snd.sb_wat) || 1379 sbreserve(&so->so_snd, cnt)) { 1380 error = ENOBUFS; 1381 goto bad; 1382 } 1383 so->so_snd.sb_wat = cnt; 1384 break; 1385 1386 case SO_RCVBUF: 1387 if (so->so_state & SS_CANTRCVMORE) { 1388 error = EINVAL; 1389 goto bad; 1390 } 1391 if (sbcheckreserve(cnt, so->so_rcv.sb_wat) || 1392 sbreserve(&so->so_rcv, cnt)) { 1393 error = ENOBUFS; 1394 goto bad; 1395 } 1396 so->so_rcv.sb_wat = cnt; 1397 break; 1398 1399 case SO_SNDLOWAT: 1400 so->so_snd.sb_lowat = 1401 (cnt > so->so_snd.sb_hiwat) ? 1402 so->so_snd.sb_hiwat : cnt; 1403 break; 1404 case SO_RCVLOWAT: 1405 so->so_rcv.sb_lowat = 1406 (cnt > so->so_rcv.sb_hiwat) ? 1407 so->so_rcv.sb_hiwat : cnt; 1408 break; 1409 } 1410 break; 1411 } 1412 1413 case SO_SNDTIMEO: 1414 case SO_RCVTIMEO: 1415 { 1416 struct timeval *tv; 1417 u_short val; 1418 1419 if (m == NULL || m->m_len < sizeof (*tv)) { 1420 error = EINVAL; 1421 goto bad; 1422 } 1423 tv = mtod(m, struct timeval *); 1424 if (tv->tv_sec > (USHRT_MAX - tv->tv_usec / tick) / hz) { 1425 error = EDOM; 1426 goto bad; 1427 } 1428 val = tv->tv_sec * hz + tv->tv_usec / tick; 1429 if (val == 0 && tv->tv_usec != 0) 1430 val = 1; 1431 1432 switch (optname) { 1433 1434 case SO_SNDTIMEO: 1435 so->so_snd.sb_timeo = val; 1436 break; 1437 case SO_RCVTIMEO: 1438 so->so_rcv.sb_timeo = val; 1439 break; 1440 } 1441 break; 1442 } 1443 1444 #ifdef SOCKET_SPLICE 1445 case SO_SPLICE: 1446 if (m == NULL) { 1447 error = sosplice(so, -1, 0); 1448 } else if (m->m_len < sizeof(int)) { 1449 error = EINVAL; 1450 goto bad; 1451 } else if (m->m_len < sizeof(struct splice)) { 1452 error = sosplice(so, *mtod(m, int *), 0); 1453 } else { 1454 error = sosplice(so, 1455 mtod(m, struct splice *)->sp_fd, 1456 mtod(m, struct splice *)->sp_max); 1457 } 1458 break; 1459 #endif /* SOCKET_SPLICE */ 1460 1461 default: 1462 error = ENOPROTOOPT; 1463 break; 1464 } 1465 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1466 (void) ((*so->so_proto->pr_ctloutput) 1467 (PRCO_SETOPT, so, level, optname, &m0)); 1468 m = NULL; /* freed by protocol */ 1469 } 1470 } 1471 bad: 1472 if (m) 1473 (void) m_free(m); 1474 return (error); 1475 } 1476 1477 int 1478 sogetopt(struct socket *so, int level, int optname, struct mbuf **mp) 1479 { 1480 struct mbuf *m; 1481 1482 if (level != SOL_SOCKET) { 1483 if (so->so_proto && so->so_proto->pr_ctloutput) { 1484 return ((*so->so_proto->pr_ctloutput) 1485 (PRCO_GETOPT, so, level, optname, mp)); 1486 } else 1487 return (ENOPROTOOPT); 1488 } else { 1489 m = m_get(M_WAIT, MT_SOOPTS); 1490 m->m_len = sizeof (int); 1491 1492 switch (optname) { 1493 1494 case SO_LINGER: 1495 m->m_len = sizeof (struct linger); 1496 mtod(m, struct linger *)->l_onoff = 1497 so->so_options & SO_LINGER; 1498 mtod(m, struct linger *)->l_linger = so->so_linger; 1499 break; 1500 1501 case SO_BINDANY: 1502 case SO_USELOOPBACK: 1503 case SO_DONTROUTE: 1504 case SO_DEBUG: 1505 case SO_KEEPALIVE: 1506 case SO_REUSEADDR: 1507 case SO_REUSEPORT: 1508 case SO_BROADCAST: 1509 case SO_OOBINLINE: 1510 case SO_JUMBO: 1511 case SO_TIMESTAMP: 1512 *mtod(m, int *) = so->so_options & optname; 1513 break; 1514 1515 case SO_TYPE: 1516 *mtod(m, int *) = so->so_type; 1517 break; 1518 1519 case SO_ERROR: 1520 *mtod(m, int *) = so->so_error; 1521 so->so_error = 0; 1522 break; 1523 1524 case SO_SNDBUF: 1525 *mtod(m, int *) = so->so_snd.sb_hiwat; 1526 break; 1527 1528 case SO_RCVBUF: 1529 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1530 break; 1531 1532 case SO_SNDLOWAT: 1533 *mtod(m, int *) = so->so_snd.sb_lowat; 1534 break; 1535 1536 case SO_RCVLOWAT: 1537 *mtod(m, int *) = so->so_rcv.sb_lowat; 1538 break; 1539 1540 case SO_SNDTIMEO: 1541 case SO_RCVTIMEO: 1542 { 1543 int val = (optname == SO_SNDTIMEO ? 1544 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1545 1546 m->m_len = sizeof(struct timeval); 1547 mtod(m, struct timeval *)->tv_sec = val / hz; 1548 mtod(m, struct timeval *)->tv_usec = 1549 (val % hz) * tick; 1550 break; 1551 } 1552 1553 #ifdef SOCKET_SPLICE 1554 case SO_SPLICE: 1555 { 1556 int s = splsoftnet(); 1557 1558 m->m_len = sizeof(off_t); 1559 *mtod(m, off_t *) = so->so_splicelen; 1560 splx(s); 1561 break; 1562 } 1563 #endif /* SOCKET_SPLICE */ 1564 1565 case SO_PEERCRED: 1566 if (so->so_proto->pr_protocol == AF_UNIX) { 1567 struct unpcb *unp = sotounpcb(so); 1568 1569 if (unp->unp_flags & UNP_FEIDS) { 1570 *mp = m = m_get(M_WAIT, MT_SOOPTS); 1571 m->m_len = sizeof(unp->unp_connid); 1572 bcopy((caddr_t)(&(unp->unp_connid)), 1573 mtod(m, caddr_t), 1574 (unsigned)m->m_len); 1575 } else 1576 return (ENOTCONN); 1577 } else 1578 return (EOPNOTSUPP); 1579 break; 1580 1581 default: 1582 (void)m_free(m); 1583 return (ENOPROTOOPT); 1584 } 1585 *mp = m; 1586 return (0); 1587 } 1588 } 1589 1590 void 1591 sohasoutofband(struct socket *so) 1592 { 1593 csignal(so->so_pgid, SIGURG, so->so_siguid, so->so_sigeuid); 1594 selwakeup(&so->so_rcv.sb_sel); 1595 } 1596 1597 int 1598 soo_kqfilter(struct file *fp, struct knote *kn) 1599 { 1600 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1601 struct sockbuf *sb; 1602 int s; 1603 1604 switch (kn->kn_filter) { 1605 case EVFILT_READ: 1606 if (so->so_options & SO_ACCEPTCONN) 1607 kn->kn_fop = &solisten_filtops; 1608 else 1609 kn->kn_fop = &soread_filtops; 1610 sb = &so->so_rcv; 1611 break; 1612 case EVFILT_WRITE: 1613 kn->kn_fop = &sowrite_filtops; 1614 sb = &so->so_snd; 1615 break; 1616 default: 1617 return (1); 1618 } 1619 1620 s = splnet(); 1621 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); 1622 sb->sb_flags |= SB_KNOTE; 1623 splx(s); 1624 return (0); 1625 } 1626 1627 void 1628 filt_sordetach(struct knote *kn) 1629 { 1630 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1631 int s = splnet(); 1632 1633 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); 1634 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) 1635 so->so_rcv.sb_flags &= ~SB_KNOTE; 1636 splx(s); 1637 } 1638 1639 /*ARGSUSED*/ 1640 int 1641 filt_soread(struct knote *kn, long hint) 1642 { 1643 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1644 1645 kn->kn_data = so->so_rcv.sb_cc; 1646 #ifdef SOCKET_SPLICE 1647 if (so->so_splice) 1648 return (0); 1649 #endif /* SOCKET_SPLICE */ 1650 if (so->so_state & SS_CANTRCVMORE) { 1651 kn->kn_flags |= EV_EOF; 1652 kn->kn_fflags = so->so_error; 1653 return (1); 1654 } 1655 if (so->so_error) /* temporary udp error */ 1656 return (1); 1657 if (kn->kn_sfflags & NOTE_LOWAT) 1658 return (kn->kn_data >= kn->kn_sdata); 1659 return (kn->kn_data >= so->so_rcv.sb_lowat); 1660 } 1661 1662 void 1663 filt_sowdetach(struct knote *kn) 1664 { 1665 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1666 int s = splnet(); 1667 1668 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); 1669 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) 1670 so->so_snd.sb_flags &= ~SB_KNOTE; 1671 splx(s); 1672 } 1673 1674 /*ARGSUSED*/ 1675 int 1676 filt_sowrite(struct knote *kn, long hint) 1677 { 1678 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1679 1680 kn->kn_data = sbspace(&so->so_snd); 1681 if (so->so_state & SS_CANTSENDMORE) { 1682 kn->kn_flags |= EV_EOF; 1683 kn->kn_fflags = so->so_error; 1684 return (1); 1685 } 1686 if (so->so_error) /* temporary udp error */ 1687 return (1); 1688 if (((so->so_state & SS_ISCONNECTED) == 0) && 1689 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1690 return (0); 1691 if (kn->kn_sfflags & NOTE_LOWAT) 1692 return (kn->kn_data >= kn->kn_sdata); 1693 return (kn->kn_data >= so->so_snd.sb_lowat); 1694 } 1695 1696 /*ARGSUSED*/ 1697 int 1698 filt_solisten(struct knote *kn, long hint) 1699 { 1700 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1701 1702 kn->kn_data = so->so_qlen; 1703 return (so->so_qlen != 0); 1704 } 1705