1 /* $OpenBSD: uipc_socket.c,v 1.39 2001/11/28 17:18:00 ericj Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 37 */ 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/file.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/domain.h> 46 #include <sys/kernel.h> 47 #include <sys/event.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/signalvar.h> 52 #include <sys/resourcevar.h> 53 #include <sys/pool.h> 54 55 void filt_sordetach(struct knote *kn); 56 int filt_soread(struct knote *kn, long hint); 57 void filt_sowdetach(struct knote *kn); 58 int filt_sowrite(struct knote *kn, long hint); 59 int filt_solisten(struct knote *kn, long hint); 60 61 struct filterops solisten_filtops = 62 { 1, NULL, filt_sordetach, filt_solisten }; 63 struct filterops soread_filtops = 64 { 1, NULL, filt_sordetach, filt_soread }; 65 struct filterops sowrite_filtops = 66 { 1, NULL, filt_sowdetach, filt_sowrite }; 67 68 69 #ifndef SOMINCONN 70 #define SOMINCONN 80 71 #endif /* SOMINCONN */ 72 73 int somaxconn = SOMAXCONN; 74 int sominconn = SOMINCONN; 75 76 struct pool socket_pool; 77 78 void 79 soinit(void) 80 { 81 82 pool_init(&socket_pool, sizeof(struct socket), 0, 0, 0, 83 "sockpl", 0, NULL, NULL, M_SOCKET); 84 } 85 86 /* 87 * Socket operation routines. 88 * These routines are called by the routines in 89 * sys_socket.c or from a system process, and 90 * implement the semantics of socket operations by 91 * switching out to the protocol specific routines. 92 */ 93 /*ARGSUSED*/ 94 int 95 socreate(dom, aso, type, proto) 96 int dom; 97 struct socket **aso; 98 register int type; 99 int proto; 100 { 101 struct proc *p = curproc; /* XXX */ 102 struct protosw *prp; 103 struct socket *so; 104 int error, s; 105 106 if (proto) 107 prp = pffindproto(dom, proto, type); 108 else 109 prp = pffindtype(dom, type); 110 if (prp == 0 || prp->pr_usrreq == 0) 111 return (EPROTONOSUPPORT); 112 if (prp->pr_type != type) 113 return (EPROTOTYPE); 114 s = splsoftnet(); 115 so = pool_get(&socket_pool, PR_WAITOK); 116 bzero((caddr_t)so, sizeof(*so)); 117 TAILQ_INIT(&so->so_q0); 118 TAILQ_INIT(&so->so_q); 119 so->so_type = type; 120 if (p->p_ucred->cr_uid == 0) 121 so->so_state = SS_PRIV; 122 so->so_ruid = p->p_cred->p_ruid; 123 so->so_euid = p->p_ucred->cr_uid; 124 so->so_proto = prp; 125 error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL, 126 (struct mbuf *)(long)proto, NULL); 127 if (error) { 128 so->so_state |= SS_NOFDREF; 129 sofree(so); 130 splx(s); 131 return (error); 132 } 133 #ifdef COMPAT_SUNOS 134 { 135 extern struct emul emul_sunos; 136 if (p->p_emul == &emul_sunos && type == SOCK_DGRAM) 137 so->so_options |= SO_BROADCAST; 138 } 139 #endif 140 splx(s); 141 *aso = so; 142 return (0); 143 } 144 145 int 146 sobind(so, nam) 147 struct socket *so; 148 struct mbuf *nam; 149 { 150 int s = splsoftnet(); 151 int error; 152 153 error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL); 154 splx(s); 155 return (error); 156 } 157 158 int 159 solisten(so, backlog) 160 register struct socket *so; 161 int backlog; 162 { 163 int s = splsoftnet(), error; 164 165 error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL); 166 if (error) { 167 splx(s); 168 return (error); 169 } 170 if (TAILQ_FIRST(&so->so_q) == NULL) 171 so->so_options |= SO_ACCEPTCONN; 172 if (backlog < 0 || backlog > somaxconn) 173 backlog = somaxconn; 174 if (backlog < sominconn) 175 backlog = sominconn; 176 so->so_qlimit = backlog; 177 splx(s); 178 return (0); 179 } 180 181 /* 182 * Must be called at splsoftnet() 183 */ 184 185 void 186 sofree(so) 187 register struct socket *so; 188 { 189 190 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) 191 return; 192 if (so->so_head) { 193 /* 194 * We must not decommission a socket that's on the accept(2) 195 * queue. If we do, then accept(2) may hang after select(2) 196 * indicated that the listening socket was ready. 197 */ 198 if (!soqremque(so, 0)) 199 return; 200 } 201 sbrelease(&so->so_snd); 202 sorflush(so); 203 pool_put(&socket_pool, so); 204 } 205 206 /* 207 * Close a socket on last file table reference removal. 208 * Initiate disconnect if connected. 209 * Free socket when disconnect complete. 210 */ 211 int 212 soclose(so) 213 register struct socket *so; 214 { 215 struct socket *so2; 216 int s = splsoftnet(); /* conservative */ 217 int error = 0; 218 219 if (so->so_options & SO_ACCEPTCONN) { 220 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { 221 (void) soqremque(so2, 0); 222 (void) soabort(so2); 223 } 224 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { 225 (void) soqremque(so2, 1); 226 (void) soabort(so2); 227 } 228 } 229 if (so->so_pcb == 0) 230 goto discard; 231 if (so->so_state & SS_ISCONNECTED) { 232 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 233 error = sodisconnect(so); 234 if (error) 235 goto drop; 236 } 237 if (so->so_options & SO_LINGER) { 238 if ((so->so_state & SS_ISDISCONNECTING) && 239 (so->so_state & SS_NBIO)) 240 goto drop; 241 while (so->so_state & SS_ISCONNECTED) { 242 error = tsleep((caddr_t)&so->so_timeo, 243 PSOCK | PCATCH, netcls, 244 so->so_linger * hz); 245 if (error) 246 break; 247 } 248 } 249 } 250 drop: 251 if (so->so_pcb) { 252 int error2 = (*so->so_proto->pr_usrreq)(so, PRU_DETACH, NULL, 253 NULL, NULL); 254 if (error == 0) 255 error = error2; 256 } 257 discard: 258 if (so->so_state & SS_NOFDREF) 259 panic("soclose: NOFDREF"); 260 so->so_state |= SS_NOFDREF; 261 sofree(so); 262 splx(s); 263 return (error); 264 } 265 266 /* 267 * Must be called at splsoftnet... 268 */ 269 int 270 soabort(so) 271 struct socket *so; 272 { 273 274 return (*so->so_proto->pr_usrreq)(so, PRU_ABORT, NULL, NULL, NULL); 275 } 276 277 int 278 soaccept(so, nam) 279 register struct socket *so; 280 struct mbuf *nam; 281 { 282 int s = splsoftnet(); 283 int error = 0; 284 285 if ((so->so_state & SS_NOFDREF) == 0) 286 panic("soaccept: !NOFDREF"); 287 so->so_state &= ~SS_NOFDREF; 288 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 289 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 290 error = (*so->so_proto->pr_usrreq)(so, PRU_ACCEPT, NULL, 291 nam, NULL); 292 else 293 error = ECONNABORTED; 294 splx(s); 295 return (error); 296 } 297 298 int 299 soconnect(so, nam) 300 register struct socket *so; 301 struct mbuf *nam; 302 { 303 int s; 304 int error; 305 306 if (so->so_options & SO_ACCEPTCONN) 307 return (EOPNOTSUPP); 308 s = splsoftnet(); 309 /* 310 * If protocol is connection-based, can only connect once. 311 * Otherwise, if connected, try to disconnect first. 312 * This allows user to disconnect by connecting to, e.g., 313 * a null address. 314 */ 315 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 316 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 317 (error = sodisconnect(so)))) 318 error = EISCONN; 319 else 320 error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT, 321 NULL, nam, NULL); 322 splx(s); 323 return (error); 324 } 325 326 int 327 soconnect2(so1, so2) 328 register struct socket *so1; 329 struct socket *so2; 330 { 331 int s = splsoftnet(); 332 int error; 333 334 error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, 335 (struct mbuf *)so2, NULL); 336 splx(s); 337 return (error); 338 } 339 340 int 341 sodisconnect(so) 342 register struct socket *so; 343 { 344 int s = splsoftnet(); 345 int error; 346 347 if ((so->so_state & SS_ISCONNECTED) == 0) { 348 error = ENOTCONN; 349 goto bad; 350 } 351 if (so->so_state & SS_ISDISCONNECTING) { 352 error = EALREADY; 353 goto bad; 354 } 355 error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT, NULL, NULL, 356 NULL); 357 bad: 358 splx(s); 359 return (error); 360 } 361 362 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) 363 /* 364 * Send on a socket. 365 * If send must go all at once and message is larger than 366 * send buffering, then hard error. 367 * Lock against other senders. 368 * If must go all at once and not enough room now, then 369 * inform user that this would block and do nothing. 370 * Otherwise, if nonblocking, send as much as possible. 371 * The data to be sent is described by "uio" if nonzero, 372 * otherwise by the mbuf chain "top" (which must be null 373 * if uio is not). Data provided in mbuf chain must be small 374 * enough to send all at once. 375 * 376 * Returns nonzero on error, timeout or signal; callers 377 * must check for short counts if EINTR/ERESTART are returned. 378 * Data and control buffers are freed on return. 379 */ 380 int 381 sosend(so, addr, uio, top, control, flags) 382 register struct socket *so; 383 struct mbuf *addr; 384 struct uio *uio; 385 struct mbuf *top; 386 struct mbuf *control; 387 int flags; 388 { 389 struct proc *p = curproc; /* XXX */ 390 struct mbuf **mp; 391 struct mbuf *m; 392 long space, len, mlen, clen = 0; 393 quad_t resid; 394 int error, s, dontroute; 395 int atomic = sosendallatonce(so) || top; 396 397 if (uio) 398 resid = uio->uio_resid; 399 else 400 resid = top->m_pkthdr.len; 401 /* 402 * In theory resid should be unsigned (since uio->uio_resid is). 403 * However, space must be signed, as it might be less than 0 404 * if we over-committed, and we must use a signed comparison 405 * of space and resid. On the other hand, a negative resid 406 * causes us to loop sending 0-length segments to the protocol. 407 * MSG_EOR on a SOCK_STREAM socket is also invalid. 408 */ 409 if (resid < 0 || 410 (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 411 error = EINVAL; 412 goto out; 413 } 414 dontroute = 415 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 416 (so->so_proto->pr_flags & PR_ATOMIC); 417 p->p_stats->p_ru.ru_msgsnd++; 418 if (control) 419 clen = control->m_len; 420 #define snderr(errno) { error = errno; splx(s); goto release; } 421 422 restart: 423 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 424 goto out; 425 do { 426 s = splsoftnet(); 427 if (so->so_state & SS_CANTSENDMORE) 428 snderr(EPIPE); 429 if (so->so_error) { 430 error = so->so_error; 431 so->so_error = 0; 432 splx(s); 433 goto release; 434 } 435 if ((so->so_state & SS_ISCONNECTED) == 0) { 436 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 437 if ((so->so_state & SS_ISCONFIRMING) == 0 && 438 !(resid == 0 && clen != 0)) 439 snderr(ENOTCONN); 440 } else if (addr == 0) 441 snderr(EDESTADDRREQ); 442 } 443 space = sbspace(&so->so_snd); 444 if (flags & MSG_OOB) 445 space += 1024; 446 if ((atomic && resid > so->so_snd.sb_hiwat) || 447 clen > so->so_snd.sb_hiwat) 448 snderr(EMSGSIZE); 449 if (space < resid + clen && uio && 450 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 451 if (so->so_state & SS_NBIO) 452 snderr(EWOULDBLOCK); 453 sbunlock(&so->so_snd); 454 error = sbwait(&so->so_snd); 455 splx(s); 456 if (error) 457 goto out; 458 goto restart; 459 } 460 splx(s); 461 mp = ⊤ 462 space -= clen; 463 do { 464 if (uio == NULL) { 465 /* 466 * Data is prepackaged in "top". 467 */ 468 resid = 0; 469 if (flags & MSG_EOR) 470 top->m_flags |= M_EOR; 471 } else do { 472 if (top == 0) { 473 MGETHDR(m, M_WAIT, MT_DATA); 474 mlen = MHLEN; 475 m->m_pkthdr.len = 0; 476 m->m_pkthdr.rcvif = (struct ifnet *)0; 477 } else { 478 MGET(m, M_WAIT, MT_DATA); 479 mlen = MLEN; 480 } 481 if (resid >= MINCLSIZE && space >= MCLBYTES) { 482 MCLGET(m, M_WAIT); 483 if ((m->m_flags & M_EXT) == 0) 484 goto nopages; 485 mlen = MCLBYTES; 486 if (atomic && top == 0) { 487 len = lmin(MCLBYTES - max_hdr, resid); 488 m->m_data += max_hdr; 489 } else 490 len = lmin(MCLBYTES, resid); 491 space -= len; 492 } else { 493 nopages: 494 len = lmin(lmin(mlen, resid), space); 495 space -= len; 496 /* 497 * For datagram protocols, leave room 498 * for protocol headers in first mbuf. 499 */ 500 if (atomic && top == 0 && len < mlen) 501 MH_ALIGN(m, len); 502 } 503 error = uiomove(mtod(m, caddr_t), (int)len, 504 uio); 505 resid = uio->uio_resid; 506 m->m_len = len; 507 *mp = m; 508 top->m_pkthdr.len += len; 509 if (error) 510 goto release; 511 mp = &m->m_next; 512 if (resid <= 0) { 513 if (flags & MSG_EOR) 514 top->m_flags |= M_EOR; 515 break; 516 } 517 } while (space > 0 && atomic); 518 if (dontroute) 519 so->so_options |= SO_DONTROUTE; 520 s = splsoftnet(); /* XXX */ 521 error = (*so->so_proto->pr_usrreq)(so, 522 (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND, 523 top, addr, control); 524 splx(s); 525 if (dontroute) 526 so->so_options &= ~SO_DONTROUTE; 527 clen = 0; 528 control = 0; 529 top = 0; 530 mp = ⊤ 531 if (error) 532 goto release; 533 } while (resid && space > 0); 534 } while (resid); 535 536 release: 537 sbunlock(&so->so_snd); 538 out: 539 if (top) 540 m_freem(top); 541 if (control) 542 m_freem(control); 543 return (error); 544 } 545 546 /* 547 * Implement receive operations on a socket. 548 * We depend on the way that records are added to the sockbuf 549 * by sbappend*. In particular, each record (mbufs linked through m_next) 550 * must begin with an address if the protocol so specifies, 551 * followed by an optional mbuf or mbufs containing ancillary data, 552 * and then zero or more mbufs of data. 553 * In order to avoid blocking network interrupts for the entire time here, 554 * we splx() while doing the actual copy to user space. 555 * Although the sockbuf is locked, new data may still be appended, 556 * and thus we must maintain consistency of the sockbuf during that time. 557 * 558 * The caller may receive the data as a single mbuf chain by supplying 559 * an mbuf **mp0 for use in returning the chain. The uio is then used 560 * only for the count in uio_resid. 561 */ 562 int 563 soreceive(so, paddr, uio, mp0, controlp, flagsp) 564 register struct socket *so; 565 struct mbuf **paddr; 566 struct uio *uio; 567 struct mbuf **mp0; 568 struct mbuf **controlp; 569 int *flagsp; 570 { 571 register struct mbuf *m, **mp; 572 register int flags, len, error, s, offset; 573 struct protosw *pr = so->so_proto; 574 struct mbuf *nextrecord; 575 int moff, type = 0; 576 size_t orig_resid = uio->uio_resid; 577 int uio_error = 0; 578 int resid; 579 580 mp = mp0; 581 if (paddr) 582 *paddr = 0; 583 if (controlp) 584 *controlp = 0; 585 if (flagsp) 586 flags = *flagsp &~ MSG_EOR; 587 else 588 flags = 0; 589 if (so->so_state & SS_NBIO) 590 flags |= MSG_DONTWAIT; 591 if (flags & MSG_OOB) { 592 m = m_get(M_WAIT, MT_DATA); 593 error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m, 594 (struct mbuf *)(long)(flags & MSG_PEEK), NULL); 595 if (error) 596 goto bad; 597 do { 598 error = uiomove(mtod(m, caddr_t), 599 (int) min(uio->uio_resid, m->m_len), uio); 600 m = m_free(m); 601 } while (uio->uio_resid && error == 0 && m); 602 bad: 603 if (m) 604 m_freem(m); 605 return (error); 606 } 607 if (mp) 608 *mp = (struct mbuf *)0; 609 if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) 610 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, NULL, NULL); 611 612 restart: 613 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 614 return (error); 615 s = splsoftnet(); 616 617 m = so->so_rcv.sb_mb; 618 /* 619 * If we have less data than requested, block awaiting more 620 * (subject to any timeout) if: 621 * 1. the current count is less than the low water mark, 622 * 2. MSG_WAITALL is set, and it is possible to do the entire 623 * receive operation at once if we block (resid <= hiwat), or 624 * 3. MSG_DONTWAIT is not set. 625 * If MSG_WAITALL is set but resid is larger than the receive buffer, 626 * we have to do the receive in sections, and thus risk returning 627 * a short count if a timeout or signal occurs after we start. 628 */ 629 if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && 630 so->so_rcv.sb_cc < uio->uio_resid) && 631 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 632 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 633 m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { 634 #ifdef DIAGNOSTIC 635 if (m == 0 && so->so_rcv.sb_cc) 636 panic("receive 1"); 637 #endif 638 if (so->so_error) { 639 if (m) 640 goto dontblock; 641 error = so->so_error; 642 if ((flags & MSG_PEEK) == 0) 643 so->so_error = 0; 644 goto release; 645 } 646 if (so->so_state & SS_CANTRCVMORE) { 647 if (m) 648 goto dontblock; 649 else 650 goto release; 651 } 652 for (; m; m = m->m_next) 653 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 654 m = so->so_rcv.sb_mb; 655 goto dontblock; 656 } 657 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 658 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 659 error = ENOTCONN; 660 goto release; 661 } 662 if (uio->uio_resid == 0 && controlp == NULL) 663 goto release; 664 if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { 665 error = EWOULDBLOCK; 666 goto release; 667 } 668 sbunlock(&so->so_rcv); 669 error = sbwait(&so->so_rcv); 670 splx(s); 671 if (error) 672 return (error); 673 goto restart; 674 } 675 dontblock: 676 #ifdef notyet /* XXXX */ 677 if (uio->uio_procp) 678 uio->uio_procp->p_stats->p_ru.ru_msgrcv++; 679 #endif 680 nextrecord = m->m_nextpkt; 681 if (pr->pr_flags & PR_ADDR) { 682 #ifdef DIAGNOSTIC 683 if (m->m_type != MT_SONAME) 684 panic("receive 1a"); 685 #endif 686 orig_resid = 0; 687 if (flags & MSG_PEEK) { 688 if (paddr) 689 *paddr = m_copy(m, 0, m->m_len); 690 m = m->m_next; 691 } else { 692 sbfree(&so->so_rcv, m); 693 if (paddr) { 694 *paddr = m; 695 so->so_rcv.sb_mb = m->m_next; 696 m->m_next = 0; 697 m = so->so_rcv.sb_mb; 698 } else { 699 MFREE(m, so->so_rcv.sb_mb); 700 m = so->so_rcv.sb_mb; 701 } 702 } 703 } 704 while (m && m->m_type == MT_CONTROL && error == 0) { 705 if (flags & MSG_PEEK) { 706 if (controlp) 707 *controlp = m_copy(m, 0, m->m_len); 708 m = m->m_next; 709 } else { 710 sbfree(&so->so_rcv, m); 711 if (controlp) { 712 if (pr->pr_domain->dom_externalize && 713 mtod(m, struct cmsghdr *)->cmsg_type == 714 SCM_RIGHTS) 715 error = (*pr->pr_domain->dom_externalize)(m); 716 *controlp = m; 717 so->so_rcv.sb_mb = m->m_next; 718 m->m_next = 0; 719 m = so->so_rcv.sb_mb; 720 } else { 721 MFREE(m, so->so_rcv.sb_mb); 722 m = so->so_rcv.sb_mb; 723 } 724 } 725 if (controlp) { 726 orig_resid = 0; 727 controlp = &(*controlp)->m_next; 728 } 729 } 730 if (m) { 731 if ((flags & MSG_PEEK) == 0) 732 m->m_nextpkt = nextrecord; 733 type = m->m_type; 734 if (type == MT_OOBDATA) 735 flags |= MSG_OOB; 736 if (m->m_flags & M_BCAST) 737 flags |= MSG_BCAST; 738 if (m->m_flags & M_MCAST) 739 flags |= MSG_MCAST; 740 } 741 moff = 0; 742 offset = 0; 743 while (m && uio->uio_resid > 0 && error == 0) { 744 if (m->m_type == MT_OOBDATA) { 745 if (type != MT_OOBDATA) 746 break; 747 } else if (type == MT_OOBDATA) 748 break; 749 #ifdef DIAGNOSTIC 750 else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) 751 panic("receive 3"); 752 #endif 753 so->so_state &= ~SS_RCVATMARK; 754 len = uio->uio_resid; 755 if (so->so_oobmark && len > so->so_oobmark - offset) 756 len = so->so_oobmark - offset; 757 if (len > m->m_len - moff) 758 len = m->m_len - moff; 759 /* 760 * If mp is set, just pass back the mbufs. 761 * Otherwise copy them out via the uio, then free. 762 * Sockbuf must be consistent here (points to current mbuf, 763 * it points to next record) when we drop priority; 764 * we must note any additions to the sockbuf when we 765 * block interrupts again. 766 */ 767 if (mp == 0 && uio_error == 0) { 768 resid = uio->uio_resid; 769 splx(s); 770 uio_error = 771 uiomove(mtod(m, caddr_t) + moff, (int)len, 772 uio); 773 s = splsoftnet(); 774 if (uio_error) 775 uio->uio_resid = resid - len; 776 } else 777 uio->uio_resid -= len; 778 if (len == m->m_len - moff) { 779 if (m->m_flags & M_EOR) 780 flags |= MSG_EOR; 781 if (flags & MSG_PEEK) { 782 m = m->m_next; 783 moff = 0; 784 } else { 785 nextrecord = m->m_nextpkt; 786 sbfree(&so->so_rcv, m); 787 if (mp) { 788 *mp = m; 789 mp = &m->m_next; 790 so->so_rcv.sb_mb = m = m->m_next; 791 *mp = (struct mbuf *)0; 792 } else { 793 MFREE(m, so->so_rcv.sb_mb); 794 m = so->so_rcv.sb_mb; 795 } 796 if (m) 797 m->m_nextpkt = nextrecord; 798 } 799 } else { 800 if (flags & MSG_PEEK) 801 moff += len; 802 else { 803 if (mp) 804 *mp = m_copym(m, 0, len, M_WAIT); 805 m->m_data += len; 806 m->m_len -= len; 807 so->so_rcv.sb_cc -= len; 808 } 809 } 810 if (so->so_oobmark) { 811 if ((flags & MSG_PEEK) == 0) { 812 so->so_oobmark -= len; 813 if (so->so_oobmark == 0) { 814 so->so_state |= SS_RCVATMARK; 815 break; 816 } 817 } else { 818 offset += len; 819 if (offset == so->so_oobmark) 820 break; 821 } 822 } 823 if (flags & MSG_EOR) 824 break; 825 /* 826 * If the MSG_WAITALL flag is set (for non-atomic socket), 827 * we must not quit until "uio->uio_resid == 0" or an error 828 * termination. If a signal/timeout occurs, return 829 * with a short count but without error. 830 * Keep sockbuf locked against other readers. 831 */ 832 while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && 833 !sosendallatonce(so) && !nextrecord) { 834 if (so->so_error || so->so_state & SS_CANTRCVMORE) 835 break; 836 error = sbwait(&so->so_rcv); 837 if (error) { 838 sbunlock(&so->so_rcv); 839 splx(s); 840 return (0); 841 } 842 if ((m = so->so_rcv.sb_mb) != NULL) 843 nextrecord = m->m_nextpkt; 844 } 845 } 846 847 if (m && pr->pr_flags & PR_ATOMIC) { 848 flags |= MSG_TRUNC; 849 if ((flags & MSG_PEEK) == 0) 850 (void) sbdroprecord(&so->so_rcv); 851 } 852 if ((flags & MSG_PEEK) == 0) { 853 if (m == 0) 854 so->so_rcv.sb_mb = nextrecord; 855 if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) 856 (*pr->pr_usrreq)(so, PRU_RCVD, NULL, 857 (struct mbuf *)(long)flags, NULL); 858 } 859 if (orig_resid == uio->uio_resid && orig_resid && 860 (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { 861 sbunlock(&so->so_rcv); 862 splx(s); 863 goto restart; 864 } 865 866 if (uio_error) 867 error = uio_error; 868 869 if (flagsp) 870 *flagsp |= flags; 871 release: 872 sbunlock(&so->so_rcv); 873 splx(s); 874 return (error); 875 } 876 877 int 878 soshutdown(so, how) 879 register struct socket *so; 880 register int how; 881 { 882 register struct protosw *pr = so->so_proto; 883 884 how++; 885 if (how & ~(FREAD|FWRITE)) 886 return (EINVAL); 887 if (how & FREAD) 888 sorflush(so); 889 if (how & FWRITE) 890 return (*pr->pr_usrreq)(so, PRU_SHUTDOWN, NULL, NULL, NULL); 891 return (0); 892 } 893 894 void 895 sorflush(so) 896 register struct socket *so; 897 { 898 register struct sockbuf *sb = &so->so_rcv; 899 register struct protosw *pr = so->so_proto; 900 register int s; 901 struct sockbuf asb; 902 903 sb->sb_flags |= SB_NOINTR; 904 (void) sblock(sb, M_WAITOK); 905 s = splimp(); 906 socantrcvmore(so); 907 sbunlock(sb); 908 asb = *sb; 909 bzero((caddr_t)sb, sizeof (*sb)); 910 /* XXX - the bzero stumps all over so_rcv */ 911 if (asb.sb_flags & SB_KNOTE) { 912 sb->sb_sel.si_note = asb.sb_sel.si_note; 913 sb->sb_flags = SB_KNOTE; 914 } 915 splx(s); 916 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 917 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 918 sbrelease(&asb); 919 } 920 921 int 922 sosetopt(so, level, optname, m0) 923 register struct socket *so; 924 int level, optname; 925 struct mbuf *m0; 926 { 927 int error = 0; 928 register struct mbuf *m = m0; 929 930 if (level != SOL_SOCKET) { 931 if (so->so_proto && so->so_proto->pr_ctloutput) 932 return ((*so->so_proto->pr_ctloutput) 933 (PRCO_SETOPT, so, level, optname, &m0)); 934 error = ENOPROTOOPT; 935 } else { 936 switch (optname) { 937 938 case SO_LINGER: 939 if (m == NULL || m->m_len != sizeof (struct linger)) { 940 error = EINVAL; 941 goto bad; 942 } 943 so->so_linger = mtod(m, struct linger *)->l_linger; 944 /* fall thru... */ 945 946 case SO_DEBUG: 947 case SO_KEEPALIVE: 948 case SO_DONTROUTE: 949 case SO_USELOOPBACK: 950 case SO_BROADCAST: 951 case SO_REUSEADDR: 952 case SO_REUSEPORT: 953 case SO_OOBINLINE: 954 if (m == NULL || m->m_len < sizeof (int)) { 955 error = EINVAL; 956 goto bad; 957 } 958 if (*mtod(m, int *)) 959 so->so_options |= optname; 960 else 961 so->so_options &= ~optname; 962 break; 963 964 case SO_SNDBUF: 965 case SO_RCVBUF: 966 case SO_SNDLOWAT: 967 case SO_RCVLOWAT: 968 { 969 u_long cnt; 970 971 if (m == NULL || m->m_len < sizeof (int)) { 972 error = EINVAL; 973 goto bad; 974 } 975 cnt = *mtod(m, int *); 976 if ((long)cnt <= 0) 977 cnt = 1; 978 switch (optname) { 979 980 case SO_SNDBUF: 981 case SO_RCVBUF: 982 if (sbreserve(optname == SO_SNDBUF ? 983 &so->so_snd : &so->so_rcv, 984 cnt) == 0) { 985 error = ENOBUFS; 986 goto bad; 987 } 988 break; 989 990 case SO_SNDLOWAT: 991 so->so_snd.sb_lowat = (cnt > so->so_snd.sb_hiwat) ? 992 so->so_snd.sb_hiwat : cnt; 993 break; 994 case SO_RCVLOWAT: 995 so->so_rcv.sb_lowat = (cnt > so->so_rcv.sb_hiwat) ? 996 so->so_rcv.sb_hiwat : cnt; 997 break; 998 } 999 break; 1000 } 1001 1002 case SO_SNDTIMEO: 1003 case SO_RCVTIMEO: 1004 { 1005 struct timeval *tv; 1006 short val; 1007 1008 if (m == NULL || m->m_len < sizeof (*tv)) { 1009 error = EINVAL; 1010 goto bad; 1011 } 1012 tv = mtod(m, struct timeval *); 1013 if (tv->tv_sec * hz + tv->tv_usec / tick > SHRT_MAX) { 1014 error = EDOM; 1015 goto bad; 1016 } 1017 val = tv->tv_sec * hz + tv->tv_usec / tick; 1018 1019 switch (optname) { 1020 1021 case SO_SNDTIMEO: 1022 so->so_snd.sb_timeo = val; 1023 break; 1024 case SO_RCVTIMEO: 1025 so->so_rcv.sb_timeo = val; 1026 break; 1027 } 1028 break; 1029 } 1030 1031 default: 1032 error = ENOPROTOOPT; 1033 break; 1034 } 1035 if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { 1036 (void) ((*so->so_proto->pr_ctloutput) 1037 (PRCO_SETOPT, so, level, optname, &m0)); 1038 m = NULL; /* freed by protocol */ 1039 } 1040 } 1041 bad: 1042 if (m) 1043 (void) m_free(m); 1044 return (error); 1045 } 1046 1047 int 1048 sogetopt(so, level, optname, mp) 1049 register struct socket *so; 1050 int level, optname; 1051 struct mbuf **mp; 1052 { 1053 register struct mbuf *m; 1054 1055 if (level != SOL_SOCKET) { 1056 if (so->so_proto && so->so_proto->pr_ctloutput) { 1057 return ((*so->so_proto->pr_ctloutput) 1058 (PRCO_GETOPT, so, level, optname, mp)); 1059 } else 1060 return (ENOPROTOOPT); 1061 } else { 1062 m = m_get(M_WAIT, MT_SOOPTS); 1063 m->m_len = sizeof (int); 1064 1065 switch (optname) { 1066 1067 case SO_LINGER: 1068 m->m_len = sizeof (struct linger); 1069 mtod(m, struct linger *)->l_onoff = 1070 so->so_options & SO_LINGER; 1071 mtod(m, struct linger *)->l_linger = so->so_linger; 1072 break; 1073 1074 case SO_USELOOPBACK: 1075 case SO_DONTROUTE: 1076 case SO_DEBUG: 1077 case SO_KEEPALIVE: 1078 case SO_REUSEADDR: 1079 case SO_REUSEPORT: 1080 case SO_BROADCAST: 1081 case SO_OOBINLINE: 1082 *mtod(m, int *) = so->so_options & optname; 1083 break; 1084 1085 case SO_TYPE: 1086 *mtod(m, int *) = so->so_type; 1087 break; 1088 1089 case SO_ERROR: 1090 *mtod(m, int *) = so->so_error; 1091 so->so_error = 0; 1092 break; 1093 1094 case SO_SNDBUF: 1095 *mtod(m, int *) = so->so_snd.sb_hiwat; 1096 break; 1097 1098 case SO_RCVBUF: 1099 *mtod(m, int *) = so->so_rcv.sb_hiwat; 1100 break; 1101 1102 case SO_SNDLOWAT: 1103 *mtod(m, int *) = so->so_snd.sb_lowat; 1104 break; 1105 1106 case SO_RCVLOWAT: 1107 *mtod(m, int *) = so->so_rcv.sb_lowat; 1108 break; 1109 1110 case SO_SNDTIMEO: 1111 case SO_RCVTIMEO: 1112 { 1113 int val = (optname == SO_SNDTIMEO ? 1114 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 1115 1116 m->m_len = sizeof(struct timeval); 1117 mtod(m, struct timeval *)->tv_sec = val / hz; 1118 mtod(m, struct timeval *)->tv_usec = 1119 (val % hz) * tick; 1120 break; 1121 } 1122 1123 default: 1124 (void)m_free(m); 1125 return (ENOPROTOOPT); 1126 } 1127 *mp = m; 1128 return (0); 1129 } 1130 } 1131 1132 void 1133 sohasoutofband(so) 1134 register struct socket *so; 1135 { 1136 csignal(so->so_pgid, SIGURG, so->so_siguid, so->so_sigeuid); 1137 selwakeup(&so->so_rcv.sb_sel); 1138 } 1139 1140 int 1141 soo_kqfilter(struct file *fp, struct knote *kn) 1142 { 1143 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1144 struct sockbuf *sb; 1145 int s; 1146 1147 switch (kn->kn_filter) { 1148 case EVFILT_READ: 1149 if (so->so_options & SO_ACCEPTCONN) 1150 kn->kn_fop = &solisten_filtops; 1151 else 1152 kn->kn_fop = &soread_filtops; 1153 sb = &so->so_rcv; 1154 break; 1155 case EVFILT_WRITE: 1156 kn->kn_fop = &sowrite_filtops; 1157 sb = &so->so_snd; 1158 break; 1159 default: 1160 return (1); 1161 } 1162 1163 s = splnet(); 1164 SLIST_INSERT_HEAD(&sb->sb_sel.si_note, kn, kn_selnext); 1165 sb->sb_flags |= SB_KNOTE; 1166 splx(s); 1167 return (0); 1168 } 1169 1170 void 1171 filt_sordetach(struct knote *kn) 1172 { 1173 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1174 int s = splnet(); 1175 1176 SLIST_REMOVE(&so->so_rcv.sb_sel.si_note, kn, knote, kn_selnext); 1177 if (SLIST_EMPTY(&so->so_rcv.sb_sel.si_note)) 1178 so->so_rcv.sb_flags &= ~SB_KNOTE; 1179 splx(s); 1180 } 1181 1182 /*ARGSUSED*/ 1183 int 1184 filt_soread(struct knote *kn, long hint) 1185 { 1186 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1187 1188 kn->kn_data = so->so_rcv.sb_cc; 1189 if (so->so_state & SS_CANTRCVMORE) { 1190 kn->kn_flags |= EV_EOF; 1191 kn->kn_fflags = so->so_error; 1192 return (1); 1193 } 1194 if (so->so_error) /* temporary udp error */ 1195 return (1); 1196 if (kn->kn_sfflags & NOTE_LOWAT) 1197 return (kn->kn_data >= kn->kn_sdata); 1198 return (kn->kn_data >= so->so_rcv.sb_lowat); 1199 } 1200 1201 void 1202 filt_sowdetach(struct knote *kn) 1203 { 1204 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1205 int s = splnet(); 1206 1207 SLIST_REMOVE(&so->so_snd.sb_sel.si_note, kn, knote, kn_selnext); 1208 if (SLIST_EMPTY(&so->so_snd.sb_sel.si_note)) 1209 so->so_snd.sb_flags &= ~SB_KNOTE; 1210 splx(s); 1211 } 1212 1213 /*ARGSUSED*/ 1214 int 1215 filt_sowrite(struct knote *kn, long hint) 1216 { 1217 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1218 1219 kn->kn_data = sbspace(&so->so_snd); 1220 if (so->so_state & SS_CANTSENDMORE) { 1221 kn->kn_flags |= EV_EOF; 1222 kn->kn_fflags = so->so_error; 1223 return (1); 1224 } 1225 if (so->so_error) /* temporary udp error */ 1226 return (1); 1227 if (((so->so_state & SS_ISCONNECTED) == 0) && 1228 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 1229 return (0); 1230 if (kn->kn_sfflags & NOTE_LOWAT) 1231 return (kn->kn_data >= kn->kn_sdata); 1232 return (kn->kn_data >= so->so_snd.sb_lowat); 1233 } 1234 1235 /*ARGSUSED*/ 1236 int 1237 filt_solisten(struct knote *kn, long hint) 1238 { 1239 struct socket *so = (struct socket *)kn->kn_fp->f_data; 1240 1241 kn->kn_data = so->so_qlen; 1242 return (so->so_qlen != 0); 1243 } 1244