1 /* $OpenBSD: uipc_socket.c,v 1.361 2025/01/20 16:34:48 bluhm Exp $ */ 2 /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/domain.h> 43 #include <sys/event.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/unpcb.h> 47 #include <sys/socketvar.h> 48 #include <sys/signalvar.h> 49 #include <sys/pool.h> 50 #include <sys/atomic.h> 51 #include <sys/rwlock.h> 52 #include <sys/time.h> 53 #include <sys/refcnt.h> 54 55 #ifdef DDB 56 #include <machine/db_machdep.h> 57 #endif 58 59 void sbsync(struct sockbuf *, struct mbuf *); 60 61 int sosplice(struct socket *, int, off_t, struct timeval *); 62 void sounsplice(struct socket *, struct socket *, int); 63 void soidle(void *); 64 void sotask(void *); 65 int somove(struct socket *, int); 66 void sorflush(struct socket *); 67 68 void filt_sordetach(struct knote *kn); 69 int filt_soread(struct knote *kn, long hint); 70 void filt_sowdetach(struct knote *kn); 71 int filt_sowrite(struct knote *kn, long hint); 72 int filt_soexcept(struct knote *kn, long hint); 73 74 int filt_sowmodify(struct kevent *kev, struct knote *kn); 75 int filt_sowprocess(struct knote *kn, struct kevent *kev); 76 77 int filt_sormodify(struct kevent *kev, struct knote *kn); 78 int filt_sorprocess(struct knote *kn, struct kevent *kev); 79 80 const struct filterops soread_filtops = { 81 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 82 .f_attach = NULL, 83 .f_detach = filt_sordetach, 84 .f_event = filt_soread, 85 .f_modify = filt_sormodify, 86 .f_process = filt_sorprocess, 87 }; 88 89 const struct filterops sowrite_filtops = { 90 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 91 .f_attach = NULL, 92 .f_detach = filt_sowdetach, 93 .f_event = filt_sowrite, 94 .f_modify = filt_sowmodify, 95 .f_process = filt_sowprocess, 96 }; 97 98 const struct filterops soexcept_filtops = { 99 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 100 .f_attach = NULL, 101 .f_detach = filt_sordetach, 102 .f_event = filt_soexcept, 103 .f_modify = filt_sormodify, 104 .f_process = filt_sorprocess, 105 }; 106 107 #ifndef SOMINCONN 108 #define SOMINCONN 80 109 #endif /* SOMINCONN */ 110 111 int somaxconn = SOMAXCONN; 112 int sominconn = SOMINCONN; 113 114 struct pool socket_pool; 115 #ifdef SOCKET_SPLICE 116 struct pool sosplice_pool; 117 struct taskq *sosplice_taskq; 118 struct rwlock sosplice_lock = RWLOCK_INITIALIZER("sosplicelk"); 119 #endif 120 121 void 122 soinit(void) 123 { 124 pool_init(&socket_pool, sizeof(struct socket), 0, IPL_SOFTNET, 0, 125 "sockpl", NULL); 126 #ifdef SOCKET_SPLICE 127 pool_init(&sosplice_pool, sizeof(struct sosplice), 0, IPL_SOFTNET, 0, 128 "sosppl", NULL); 129 #endif 130 } 131 132 struct socket * 133 soalloc(const struct protosw *prp, int wait) 134 { 135 const struct domain *dp = prp->pr_domain; 136 const char *dom_name = dp->dom_name; 137 struct socket *so; 138 139 so = pool_get(&socket_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) | 140 PR_ZERO); 141 if (so == NULL) 142 return (NULL); 143 144 #ifdef WITNESS 145 /* 146 * XXX: Make WITNESS happy. AF_INET and AF_INET6 sockets could be 147 * spliced together. 148 */ 149 switch (dp->dom_family) { 150 case AF_INET: 151 case AF_INET6: 152 dom_name = "inet46"; 153 break; 154 } 155 #endif 156 157 refcnt_init_trace(&so->so_refcnt, DT_REFCNT_IDX_SOCKET); 158 rw_init_flags(&so->so_lock, dom_name, RWL_DUPOK); 159 rw_init(&so->so_rcv.sb_lock, "sbufrcv"); 160 rw_init(&so->so_snd.sb_lock, "sbufsnd"); 161 mtx_init_flags(&so->so_rcv.sb_mtx, IPL_MPFLOOR, "sbrcv", 0); 162 mtx_init_flags(&so->so_snd.sb_mtx, IPL_MPFLOOR, "sbsnd", 0); 163 klist_init_mutex(&so->so_rcv.sb_klist, &so->so_rcv.sb_mtx); 164 klist_init_mutex(&so->so_snd.sb_klist, &so->so_snd.sb_mtx); 165 sigio_init(&so->so_sigio); 166 TAILQ_INIT(&so->so_q0); 167 TAILQ_INIT(&so->so_q); 168 169 so->so_snd.sb_flags |= SB_MTXLOCK; 170 so->so_rcv.sb_flags |= SB_MTXLOCK; 171 172 return (so); 173 } 174 175 /* 176 * Socket operation routines. 177 * These routines are called by the routines in 178 * sys_socket.c or from a system process, and 179 * implement the semantics of socket operations by 180 * switching out to the protocol specific routines. 181 */ 182 int 183 socreate(int dom, struct socket **aso, int type, int proto) 184 { 185 struct proc *p = curproc; /* XXX */ 186 const struct protosw *prp; 187 struct socket *so; 188 int error; 189 190 if (proto) 191 prp = pffindproto(dom, proto, type); 192 else 193 prp = pffindtype(dom, type); 194 if (prp == NULL || prp->pr_usrreqs == NULL) 195 return (EPROTONOSUPPORT); 196 if (prp->pr_type != type) 197 return (EPROTOTYPE); 198 so = soalloc(prp, M_WAIT); 199 so->so_type = type; 200 if (suser(p) == 0) 201 so->so_state = SS_PRIV; 202 so->so_ruid = p->p_ucred->cr_ruid; 203 so->so_euid = p->p_ucred->cr_uid; 204 so->so_rgid = p->p_ucred->cr_rgid; 205 so->so_egid = p->p_ucred->cr_gid; 206 so->so_cpid = p->p_p->ps_pid; 207 so->so_proto = prp; 208 so->so_snd.sb_timeo_nsecs = INFSLP; 209 so->so_rcv.sb_timeo_nsecs = INFSLP; 210 211 solock(so); 212 error = pru_attach(so, proto, M_WAIT); 213 if (error) { 214 so->so_state |= SS_NOFDREF; 215 /* sofree() calls sounlock(). */ 216 sofree(so, 0); 217 return (error); 218 } 219 sounlock(so); 220 *aso = so; 221 return (0); 222 } 223 224 int 225 sobind(struct socket *so, struct mbuf *nam, struct proc *p) 226 { 227 soassertlocked(so); 228 return pru_bind(so, nam, p); 229 } 230 231 int 232 solisten(struct socket *so, int backlog) 233 { 234 int somaxconn_local = atomic_load_int(&somaxconn); 235 int sominconn_local = atomic_load_int(&sominconn); 236 int error; 237 238 switch (so->so_type) { 239 case SOCK_STREAM: 240 case SOCK_SEQPACKET: 241 break; 242 default: 243 return (EOPNOTSUPP); 244 } 245 246 soassertlocked(so); 247 248 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING|SS_ISDISCONNECTING)) 249 return (EINVAL); 250 #ifdef SOCKET_SPLICE 251 if (isspliced(so) || issplicedback(so)) 252 return (EOPNOTSUPP); 253 #endif /* SOCKET_SPLICE */ 254 error = pru_listen(so); 255 if (error) 256 return (error); 257 if (TAILQ_FIRST(&so->so_q) == NULL) 258 so->so_options |= SO_ACCEPTCONN; 259 if (backlog < 0 || backlog > somaxconn_local) 260 backlog = somaxconn_local; 261 if (backlog < sominconn_local) 262 backlog = sominconn_local; 263 so->so_qlimit = backlog; 264 return (0); 265 } 266 267 void 268 sorele(struct socket *so) 269 { 270 if (refcnt_rele(&so->so_refcnt) == 0) 271 return; 272 273 sigio_free(&so->so_sigio); 274 klist_free(&so->so_rcv.sb_klist); 275 klist_free(&so->so_snd.sb_klist); 276 277 mtx_enter(&so->so_snd.sb_mtx); 278 sbrelease(so, &so->so_snd); 279 mtx_leave(&so->so_snd.sb_mtx); 280 281 if (so->so_proto->pr_flags & PR_RIGHTS && 282 so->so_proto->pr_domain->dom_dispose) 283 (*so->so_proto->pr_domain->dom_dispose)(so->so_rcv.sb_mb); 284 m_purge(so->so_rcv.sb_mb); 285 286 #ifdef SOCKET_SPLICE 287 if (so->so_sp) 288 pool_put(&sosplice_pool, so->so_sp); 289 #endif 290 pool_put(&socket_pool, so); 291 } 292 293 #define SOSP_FREEING_READ 1 294 #define SOSP_FREEING_WRITE 2 295 void 296 sofree(struct socket *so, int keep_lock) 297 { 298 int persocket = solock_persocket(so); 299 300 soassertlocked(so); 301 302 if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) { 303 if (!keep_lock) 304 sounlock(so); 305 return; 306 } 307 if (so->so_head) { 308 struct socket *head = so->so_head; 309 310 /* 311 * We must not decommission a socket that's on the accept(2) 312 * queue. If we do, then accept(2) may hang after select(2) 313 * indicated that the listening socket was ready. 314 */ 315 if (so->so_onq == &head->so_q) { 316 if (!keep_lock) 317 sounlock(so); 318 return; 319 } 320 321 if (persocket) { 322 soref(head); 323 sounlock(so); 324 solock(head); 325 solock(so); 326 327 if (so->so_onq != &head->so_q0) { 328 sounlock(so); 329 sounlock(head); 330 sorele(head); 331 return; 332 } 333 } 334 335 soqremque(so, 0); 336 337 if (persocket) { 338 sounlock(head); 339 sorele(head); 340 } 341 } 342 343 if (!keep_lock) 344 sounlock(so); 345 sorele(so); 346 } 347 348 static inline uint64_t 349 solinger_nsec(struct socket *so) 350 { 351 if (so->so_linger == 0) 352 return INFSLP; 353 354 return SEC_TO_NSEC(so->so_linger); 355 } 356 357 /* 358 * Close a socket on last file table reference removal. 359 * Initiate disconnect if connected. 360 * Free socket when disconnect complete. 361 */ 362 int 363 soclose(struct socket *so, int flags) 364 { 365 struct socket *so2; 366 int error = 0; 367 368 solock(so); 369 /* Revoke async IO early. There is a final revocation in sofree(). */ 370 sigio_free(&so->so_sigio); 371 if (so->so_state & SS_ISCONNECTED) { 372 if (so->so_pcb == NULL) 373 goto discard; 374 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 375 error = sodisconnect(so); 376 if (error) 377 goto drop; 378 } 379 if (so->so_options & SO_LINGER) { 380 if ((so->so_state & SS_ISDISCONNECTING) && 381 (flags & MSG_DONTWAIT)) 382 goto drop; 383 while (so->so_state & SS_ISCONNECTED) { 384 error = sosleep_nsec(so, &so->so_timeo, 385 PSOCK | PCATCH, "netcls", 386 solinger_nsec(so)); 387 if (error) 388 break; 389 } 390 } 391 } 392 drop: 393 if (so->so_pcb) { 394 int error2; 395 error2 = pru_detach(so); 396 if (error == 0) 397 error = error2; 398 } 399 if (so->so_options & SO_ACCEPTCONN) { 400 int persocket = solock_persocket(so); 401 402 while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { 403 soref(so2); 404 solock(so2); 405 (void) soqremque(so2, 0); 406 sounlock(so); 407 soabort(so2); 408 sounlock(so2); 409 sorele(so2); 410 solock(so); 411 } 412 while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { 413 soref(so2); 414 solock_nonet(so2); 415 (void) soqremque(so2, 1); 416 if (persocket) 417 sounlock(so); 418 soabort(so2); 419 sounlock_nonet(so2); 420 sorele(so2); 421 if (persocket) 422 solock(so); 423 } 424 } 425 discard: 426 #ifdef SOCKET_SPLICE 427 if (so->so_sp) { 428 struct socket *soback; 429 430 sounlock(so); 431 mtx_enter(&so->so_snd.sb_mtx); 432 /* 433 * Concurrent sounsplice() locks `sb_mtx' mutexes on 434 * both `so_snd' and `so_rcv' before unsplice sockets. 435 */ 436 if ((soback = so->so_sp->ssp_soback) == NULL) { 437 mtx_leave(&so->so_snd.sb_mtx); 438 goto notsplicedback; 439 } 440 soref(soback); 441 mtx_leave(&so->so_snd.sb_mtx); 442 443 /* 444 * `so' can be only unspliced, and never spliced again. 445 * Thus if issplicedback(so) check is positive, socket is 446 * still spliced and `ssp_soback' points to the same 447 * socket that `soback'. 448 */ 449 sblock(&soback->so_rcv, SBL_WAIT | SBL_NOINTR); 450 if (issplicedback(so)) { 451 int freeing = SOSP_FREEING_WRITE; 452 453 if (so->so_sp->ssp_soback == so) 454 freeing |= SOSP_FREEING_READ; 455 sounsplice(so->so_sp->ssp_soback, so, freeing); 456 } 457 sbunlock(&soback->so_rcv); 458 sorele(soback); 459 460 notsplicedback: 461 sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); 462 if (isspliced(so)) { 463 struct socket *sosp; 464 int freeing = SOSP_FREEING_READ; 465 466 if (so == so->so_sp->ssp_socket) 467 freeing |= SOSP_FREEING_WRITE; 468 sosp = soref(so->so_sp->ssp_socket); 469 sounsplice(so, so->so_sp->ssp_socket, freeing); 470 sorele(sosp); 471 } 472 sbunlock(&so->so_rcv); 473 474 timeout_del_barrier(&so->so_sp->ssp_idleto); 475 task_del(sosplice_taskq, &so->so_sp->ssp_task); 476 taskq_barrier(sosplice_taskq); 477 478 solock(so); 479 } 480 #endif /* SOCKET_SPLICE */ 481 482 if (so->so_state & SS_NOFDREF) 483 panic("soclose NOFDREF: so %p, so_type %d", so, so->so_type); 484 so->so_state |= SS_NOFDREF; 485 486 /* sofree() calls sounlock(). */ 487 sofree(so, 0); 488 return (error); 489 } 490 491 void 492 soabort(struct socket *so) 493 { 494 soassertlocked(so); 495 pru_abort(so); 496 } 497 498 int 499 soaccept(struct socket *so, struct mbuf *nam) 500 { 501 int error = 0; 502 503 soassertlocked(so); 504 505 if ((so->so_state & SS_NOFDREF) == 0) 506 panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type); 507 so->so_state &= ~SS_NOFDREF; 508 if ((so->so_state & SS_ISDISCONNECTED) == 0 || 509 (so->so_proto->pr_flags & PR_ABRTACPTDIS) == 0) 510 error = pru_accept(so, nam); 511 else 512 error = ECONNABORTED; 513 return (error); 514 } 515 516 int 517 soconnect(struct socket *so, struct mbuf *nam) 518 { 519 int error; 520 521 soassertlocked(so); 522 523 if (so->so_options & SO_ACCEPTCONN) 524 return (EOPNOTSUPP); 525 /* 526 * If protocol is connection-based, can only connect once. 527 * Otherwise, if connected, try to disconnect first. 528 * This allows user to disconnect by connecting to, e.g., 529 * a null address. 530 */ 531 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 532 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 533 (error = sodisconnect(so)))) 534 error = EISCONN; 535 else 536 error = pru_connect(so, nam); 537 return (error); 538 } 539 540 int 541 soconnect2(struct socket *so1, struct socket *so2) 542 { 543 int persocket, error; 544 545 if ((persocket = solock_persocket(so1))) 546 solock_pair(so1, so2); 547 else 548 solock(so1); 549 550 error = pru_connect2(so1, so2); 551 552 if (persocket) 553 sounlock(so2); 554 sounlock(so1); 555 return (error); 556 } 557 558 int 559 sodisconnect(struct socket *so) 560 { 561 int error; 562 563 soassertlocked(so); 564 565 if ((so->so_state & SS_ISCONNECTED) == 0) 566 return (ENOTCONN); 567 if (so->so_state & SS_ISDISCONNECTING) 568 return (EALREADY); 569 error = pru_disconnect(so); 570 return (error); 571 } 572 573 int m_getuio(struct mbuf **, int, long, struct uio *); 574 575 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) 576 /* 577 * Send on a socket. 578 * If send must go all at once and message is larger than 579 * send buffering, then hard error. 580 * Lock against other senders. 581 * If must go all at once and not enough room now, then 582 * inform user that this would block and do nothing. 583 * Otherwise, if nonblocking, send as much as possible. 584 * The data to be sent is described by "uio" if nonzero, 585 * otherwise by the mbuf chain "top" (which must be null 586 * if uio is not). Data provided in mbuf chain must be small 587 * enough to send all at once. 588 * 589 * Returns nonzero on error, timeout or signal; callers 590 * must check for short counts if EINTR/ERESTART are returned. 591 * Data and control buffers are freed on return. 592 */ 593 int 594 sosend(struct socket *so, struct mbuf *addr, struct uio *uio, struct mbuf *top, 595 struct mbuf *control, int flags) 596 { 597 long space, clen = 0; 598 size_t resid; 599 int error; 600 int atomic = sosendallatonce(so) || top; 601 int dosolock = ((so->so_snd.sb_flags & SB_MTXLOCK) == 0); 602 603 if (uio) 604 resid = uio->uio_resid; 605 else 606 resid = top->m_pkthdr.len; 607 /* MSG_EOR on a SOCK_STREAM socket is invalid. */ 608 if (so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { 609 m_freem(top); 610 m_freem(control); 611 return (EINVAL); 612 } 613 if (uio && uio->uio_procp) 614 uio->uio_procp->p_ru.ru_msgsnd++; 615 if (control) { 616 /* 617 * In theory clen should be unsigned (since control->m_len is). 618 * However, space must be signed, as it might be less than 0 619 * if we over-committed, and we must use a signed comparison 620 * of space and clen. 621 */ 622 clen = control->m_len; 623 /* reserve extra space for AF_UNIX's internalize */ 624 if (so->so_proto->pr_domain->dom_family == AF_UNIX && 625 clen >= CMSG_ALIGN(sizeof(struct cmsghdr)) && 626 mtod(control, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) 627 clen = CMSG_SPACE( 628 (clen - CMSG_ALIGN(sizeof(struct cmsghdr))) * 629 (sizeof(struct fdpass) / sizeof(int))); 630 } 631 632 #define snderr(errno) { error = errno; goto release; } 633 634 restart: 635 if ((error = sblock(&so->so_snd, SBLOCKWAIT(flags))) != 0) 636 goto out; 637 if (dosolock) 638 solock_shared(so); 639 sb_mtx_lock(&so->so_snd); 640 so->so_snd.sb_state |= SS_ISSENDING; 641 do { 642 if (so->so_snd.sb_state & SS_CANTSENDMORE) 643 snderr(EPIPE); 644 if ((error = READ_ONCE(so->so_error))) { 645 so->so_error = 0; 646 snderr(error); 647 } 648 if ((so->so_state & SS_ISCONNECTED) == 0) { 649 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 650 if (!(resid == 0 && clen != 0)) 651 snderr(ENOTCONN); 652 } else if (addr == NULL) 653 snderr(EDESTADDRREQ); 654 } 655 space = sbspace_locked(so, &so->so_snd); 656 if (flags & MSG_OOB) 657 space += 1024; 658 if (so->so_proto->pr_domain->dom_family == AF_UNIX) { 659 if (atomic && resid > so->so_snd.sb_hiwat) 660 snderr(EMSGSIZE); 661 } else { 662 if (clen > so->so_snd.sb_hiwat || 663 (atomic && resid > so->so_snd.sb_hiwat - clen)) 664 snderr(EMSGSIZE); 665 } 666 if (space < clen || 667 (space - clen < resid && 668 (atomic || space < so->so_snd.sb_lowat))) { 669 if (flags & MSG_DONTWAIT) 670 snderr(EWOULDBLOCK); 671 sbunlock(&so->so_snd); 672 error = sbwait(so, &so->so_snd); 673 so->so_snd.sb_state &= ~SS_ISSENDING; 674 sb_mtx_unlock(&so->so_snd); 675 if (dosolock) 676 sounlock_shared(so); 677 if (error) 678 goto out; 679 goto restart; 680 } 681 space -= clen; 682 do { 683 if (uio == NULL) { 684 /* 685 * Data is prepackaged in "top". 686 */ 687 resid = 0; 688 if (flags & MSG_EOR) 689 top->m_flags |= M_EOR; 690 } else { 691 sb_mtx_unlock(&so->so_snd); 692 if (dosolock) 693 sounlock_shared(so); 694 error = m_getuio(&top, atomic, space, uio); 695 if (dosolock) 696 solock_shared(so); 697 sb_mtx_lock(&so->so_snd); 698 if (error) 699 goto release; 700 space -= top->m_pkthdr.len; 701 resid = uio->uio_resid; 702 if (flags & MSG_EOR) 703 top->m_flags |= M_EOR; 704 } 705 if (resid == 0) 706 so->so_snd.sb_state &= ~SS_ISSENDING; 707 if (top && so->so_options & SO_ZEROIZE) 708 top->m_flags |= M_ZEROIZE; 709 sb_mtx_unlock(&so->so_snd); 710 if (!dosolock) 711 solock_shared(so); 712 if (flags & MSG_OOB) 713 error = pru_sendoob(so, top, addr, control); 714 else 715 error = pru_send(so, top, addr, control); 716 if (!dosolock) 717 sounlock_shared(so); 718 sb_mtx_lock(&so->so_snd); 719 clen = 0; 720 control = NULL; 721 top = NULL; 722 if (error) 723 goto release; 724 } while (resid && space > 0); 725 } while (resid); 726 727 release: 728 so->so_snd.sb_state &= ~SS_ISSENDING; 729 sb_mtx_unlock(&so->so_snd); 730 if (dosolock) 731 sounlock_shared(so); 732 sbunlock(&so->so_snd); 733 out: 734 m_freem(top); 735 m_freem(control); 736 return (error); 737 } 738 739 int 740 m_getuio(struct mbuf **mp, int atomic, long space, struct uio *uio) 741 { 742 struct mbuf *m, *top = NULL; 743 struct mbuf **nextp = ⊤ 744 u_long len, mlen; 745 size_t resid = uio->uio_resid; 746 int error; 747 748 do { 749 if (top == NULL) { 750 MGETHDR(m, M_WAIT, MT_DATA); 751 mlen = MHLEN; 752 } else { 753 MGET(m, M_WAIT, MT_DATA); 754 mlen = MLEN; 755 } 756 /* chain mbuf together */ 757 *nextp = m; 758 nextp = &m->m_next; 759 760 resid = ulmin(resid, space); 761 if (resid >= MINCLSIZE) { 762 MCLGETL(m, M_NOWAIT, ulmin(resid, MAXMCLBYTES)); 763 if ((m->m_flags & M_EXT) == 0) 764 MCLGETL(m, M_NOWAIT, MCLBYTES); 765 if ((m->m_flags & M_EXT) == 0) 766 goto nopages; 767 mlen = m->m_ext.ext_size; 768 len = ulmin(mlen, resid); 769 /* 770 * For datagram protocols, leave room 771 * for protocol headers in first mbuf. 772 */ 773 if (atomic && m == top && len < mlen - max_hdr) 774 m->m_data += max_hdr; 775 } else { 776 nopages: 777 len = ulmin(mlen, resid); 778 /* 779 * For datagram protocols, leave room 780 * for protocol headers in first mbuf. 781 */ 782 if (atomic && m == top && len < mlen - max_hdr) 783 m_align(m, len); 784 } 785 786 error = uiomove(mtod(m, caddr_t), len, uio); 787 if (error) { 788 m_freem(top); 789 return (error); 790 } 791 792 /* adjust counters */ 793 resid = uio->uio_resid; 794 space -= len; 795 m->m_len = len; 796 top->m_pkthdr.len += len; 797 798 /* Is there more space and more data? */ 799 } while (space > 0 && resid > 0); 800 801 *mp = top; 802 return 0; 803 } 804 805 /* 806 * Following replacement or removal of the first mbuf on the first 807 * mbuf chain of a socket buffer, push necessary state changes back 808 * into the socket buffer so that other consumers see the values 809 * consistently. 'nextrecord' is the callers locally stored value of 810 * the original value of sb->sb_mb->m_nextpkt which must be restored 811 * when the lead mbuf changes. NOTE: 'nextrecord' may be NULL. 812 */ 813 void 814 sbsync(struct sockbuf *sb, struct mbuf *nextrecord) 815 { 816 817 /* 818 * First, update for the new value of nextrecord. If necessary, 819 * make it the first record. 820 */ 821 if (sb->sb_mb != NULL) 822 sb->sb_mb->m_nextpkt = nextrecord; 823 else 824 sb->sb_mb = nextrecord; 825 826 /* 827 * Now update any dependent socket buffer fields to reflect 828 * the new state. This is an inline of SB_EMPTY_FIXUP, with 829 * the addition of a second clause that takes care of the 830 * case where sb_mb has been updated, but remains the last 831 * record. 832 */ 833 if (sb->sb_mb == NULL) { 834 sb->sb_mbtail = NULL; 835 sb->sb_lastrecord = NULL; 836 } else if (sb->sb_mb->m_nextpkt == NULL) 837 sb->sb_lastrecord = sb->sb_mb; 838 } 839 840 /* 841 * Implement receive operations on a socket. 842 * We depend on the way that records are added to the sockbuf 843 * by sbappend*. In particular, each record (mbufs linked through m_next) 844 * must begin with an address if the protocol so specifies, 845 * followed by an optional mbuf or mbufs containing ancillary data, 846 * and then zero or more mbufs of data. 847 * In order to avoid blocking network for the entire time here, we release 848 * the solock() while doing the actual copy to user space. 849 * Although the sockbuf is locked, new data may still be appended, 850 * and thus we must maintain consistency of the sockbuf during that time. 851 * 852 * The caller may receive the data as a single mbuf chain by supplying 853 * an mbuf **mp0 for use in returning the chain. The uio is then used 854 * only for the count in uio_resid. 855 */ 856 int 857 soreceive(struct socket *so, struct mbuf **paddr, struct uio *uio, 858 struct mbuf **mp0, struct mbuf **controlp, int *flagsp, 859 socklen_t controllen) 860 { 861 struct mbuf *m, **mp; 862 struct mbuf *cm; 863 u_long len, offset, moff; 864 int flags, error, error2, type, uio_error = 0; 865 const struct protosw *pr = so->so_proto; 866 struct mbuf *nextrecord; 867 size_t resid, orig_resid = uio->uio_resid; 868 int dosolock = ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0); 869 870 mp = mp0; 871 if (paddr) 872 *paddr = NULL; 873 if (controlp) 874 *controlp = NULL; 875 if (flagsp) 876 flags = *flagsp &~ MSG_EOR; 877 else 878 flags = 0; 879 if (flags & MSG_OOB) { 880 m = m_get(M_WAIT, MT_DATA); 881 solock_shared(so); 882 error = pru_rcvoob(so, m, flags & MSG_PEEK); 883 sounlock_shared(so); 884 if (error) 885 goto bad; 886 do { 887 error = uiomove(mtod(m, caddr_t), 888 ulmin(uio->uio_resid, m->m_len), uio); 889 m = m_free(m); 890 } while (uio->uio_resid && error == 0 && m); 891 bad: 892 m_freem(m); 893 return (error); 894 } 895 if (mp) 896 *mp = NULL; 897 898 restart: 899 if ((error = sblock(&so->so_rcv, SBLOCKWAIT(flags))) != 0) 900 return (error); 901 if (dosolock) 902 solock_shared(so); 903 sb_mtx_lock(&so->so_rcv); 904 905 m = so->so_rcv.sb_mb; 906 #ifdef SOCKET_SPLICE 907 if (isspliced(so)) 908 m = NULL; 909 #endif /* SOCKET_SPLICE */ 910 /* 911 * If we have less data than requested, block awaiting more 912 * (subject to any timeout) if: 913 * 1. the current count is less than the low water mark, 914 * 2. MSG_WAITALL is set, and it is possible to do the entire 915 * receive operation at once if we block (resid <= hiwat), or 916 * 3. MSG_DONTWAIT is not set. 917 * If MSG_WAITALL is set but resid is larger than the receive buffer, 918 * we have to do the receive in sections, and thus risk returning 919 * a short count if a timeout or signal occurs after we start. 920 */ 921 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 922 so->so_rcv.sb_cc < uio->uio_resid) && 923 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 924 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 925 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 926 #ifdef DIAGNOSTIC 927 if (m == NULL && so->so_rcv.sb_cc) 928 #ifdef SOCKET_SPLICE 929 if (!isspliced(so)) 930 #endif /* SOCKET_SPLICE */ 931 panic("receive 1: so %p, so_type %d, sb_cc %lu", 932 so, so->so_type, so->so_rcv.sb_cc); 933 #endif 934 if ((error2 = READ_ONCE(so->so_error))) { 935 if (m) 936 goto dontblock; 937 error = error2; 938 if ((flags & MSG_PEEK) == 0) 939 so->so_error = 0; 940 goto release; 941 } 942 if (so->so_rcv.sb_state & SS_CANTRCVMORE) { 943 if (m) 944 goto dontblock; 945 else if (so->so_rcv.sb_cc == 0) 946 goto release; 947 } 948 for (; m; m = m->m_next) 949 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 950 m = so->so_rcv.sb_mb; 951 goto dontblock; 952 } 953 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 954 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 955 error = ENOTCONN; 956 goto release; 957 } 958 if (uio->uio_resid == 0 && controlp == NULL) 959 goto release; 960 if (flags & MSG_DONTWAIT) { 961 error = EWOULDBLOCK; 962 goto release; 963 } 964 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1"); 965 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1"); 966 967 sbunlock(&so->so_rcv); 968 error = sbwait(so, &so->so_rcv); 969 sb_mtx_unlock(&so->so_rcv); 970 if (dosolock) 971 sounlock_shared(so); 972 if (error) 973 return (error); 974 goto restart; 975 } 976 dontblock: 977 /* 978 * On entry here, m points to the first record of the socket buffer. 979 * From this point onward, we maintain 'nextrecord' as a cache of the 980 * pointer to the next record in the socket buffer. We must keep the 981 * various socket buffer pointers and local stack versions of the 982 * pointers in sync, pushing out modifications before operations that 983 * may sleep, and re-reading them afterwards. 984 * 985 * Otherwise, we will race with the network stack appending new data 986 * or records onto the socket buffer by using inconsistent/stale 987 * versions of the field, possibly resulting in socket buffer 988 * corruption. 989 */ 990 if (uio->uio_procp) 991 uio->uio_procp->p_ru.ru_msgrcv++; 992 KASSERT(m == so->so_rcv.sb_mb); 993 SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); 994 SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); 995 nextrecord = m->m_nextpkt; 996 if (pr->pr_flags & PR_ADDR) { 997 #ifdef DIAGNOSTIC 998 if (m->m_type != MT_SONAME) 999 panic("receive 1a: so %p, so_type %d, m %p, m_type %d", 1000 so, so->so_type, m, m->m_type); 1001 #endif 1002 orig_resid = 0; 1003 if (flags & MSG_PEEK) { 1004 if (paddr) 1005 *paddr = m_copym(m, 0, m->m_len, M_NOWAIT); 1006 m = m->m_next; 1007 } else { 1008 sbfree(so, &so->so_rcv, m); 1009 if (paddr) { 1010 *paddr = m; 1011 so->so_rcv.sb_mb = m->m_next; 1012 m->m_next = NULL; 1013 m = so->so_rcv.sb_mb; 1014 } else { 1015 so->so_rcv.sb_mb = m_free(m); 1016 m = so->so_rcv.sb_mb; 1017 } 1018 sbsync(&so->so_rcv, nextrecord); 1019 } 1020 } 1021 while (m && m->m_type == MT_CONTROL && error == 0) { 1022 int skip = 0; 1023 if (flags & MSG_PEEK) { 1024 if (mtod(m, struct cmsghdr *)->cmsg_type == 1025 SCM_RIGHTS) { 1026 /* don't leak internalized SCM_RIGHTS msgs */ 1027 skip = 1; 1028 } else if (controlp) 1029 *controlp = m_copym(m, 0, m->m_len, M_NOWAIT); 1030 m = m->m_next; 1031 } else { 1032 sbfree(so, &so->so_rcv, m); 1033 so->so_rcv.sb_mb = m->m_next; 1034 m->m_nextpkt = m->m_next = NULL; 1035 cm = m; 1036 m = so->so_rcv.sb_mb; 1037 sbsync(&so->so_rcv, nextrecord); 1038 if (controlp) { 1039 if (pr->pr_domain->dom_externalize) { 1040 sb_mtx_unlock(&so->so_rcv); 1041 if (dosolock) 1042 sounlock_shared(so); 1043 error = 1044 (*pr->pr_domain->dom_externalize) 1045 (cm, controllen, flags); 1046 if (dosolock) 1047 solock_shared(so); 1048 sb_mtx_lock(&so->so_rcv); 1049 } 1050 *controlp = cm; 1051 } else { 1052 /* 1053 * Dispose of any SCM_RIGHTS message that went 1054 * through the read path rather than recv. 1055 */ 1056 if (pr->pr_domain->dom_dispose) { 1057 sb_mtx_unlock(&so->so_rcv); 1058 pr->pr_domain->dom_dispose(cm); 1059 sb_mtx_lock(&so->so_rcv); 1060 } 1061 m_free(cm); 1062 } 1063 } 1064 if (m != NULL) 1065 nextrecord = so->so_rcv.sb_mb->m_nextpkt; 1066 else 1067 nextrecord = so->so_rcv.sb_mb; 1068 if (controlp && !skip) 1069 controlp = &(*controlp)->m_next; 1070 orig_resid = 0; 1071 } 1072 1073 /* If m is non-NULL, we have some data to read. */ 1074 if (m) { 1075 type = m->m_type; 1076 if (type == MT_OOBDATA) 1077 flags |= MSG_OOB; 1078 if (m->m_flags & M_BCAST) 1079 flags |= MSG_BCAST; 1080 if (m->m_flags & M_MCAST) 1081 flags |= MSG_MCAST; 1082 } 1083 SBLASTRECORDCHK(&so->so_rcv, "soreceive 2"); 1084 SBLASTMBUFCHK(&so->so_rcv, "soreceive 2"); 1085 1086 moff = 0; 1087 offset = 0; 1088 while (m && uio->uio_resid > 0 && error == 0) { 1089 if (m->m_type == MT_OOBDATA) { 1090 if (type != MT_OOBDATA) 1091 break; 1092 } else if (type == MT_OOBDATA) { 1093 break; 1094 } else if (m->m_type == MT_CONTROL) { 1095 /* 1096 * If there is more than one control message in the 1097 * stream, we do a short read. Next can be received 1098 * or disposed by another system call. 1099 */ 1100 break; 1101 #ifdef DIAGNOSTIC 1102 } else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) { 1103 panic("receive 3: so %p, so_type %d, m %p, m_type %d", 1104 so, so->so_type, m, m->m_type); 1105 #endif 1106 } 1107 so->so_rcv.sb_state &= ~SS_RCVATMARK; 1108 len = uio->uio_resid; 1109 if (so->so_oobmark && len > so->so_oobmark - offset) 1110 len = so->so_oobmark - offset; 1111 if (len > m->m_len - moff) 1112 len = m->m_len - moff; 1113 /* 1114 * If mp is set, just pass back the mbufs. 1115 * Otherwise copy them out via the uio, then free. 1116 * Sockbuf must be consistent here (points to current mbuf, 1117 * it points to next record) when we drop priority; 1118 * we must note any additions to the sockbuf when we 1119 * block interrupts again. 1120 */ 1121 if (mp == NULL && uio_error == 0) { 1122 SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove"); 1123 SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove"); 1124 resid = uio->uio_resid; 1125 sb_mtx_unlock(&so->so_rcv); 1126 if (dosolock) 1127 sounlock_shared(so); 1128 uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio); 1129 if (dosolock) 1130 solock_shared(so); 1131 sb_mtx_lock(&so->so_rcv); 1132 if (uio_error) 1133 uio->uio_resid = resid - len; 1134 } else 1135 uio->uio_resid -= len; 1136 if (len == m->m_len - moff) { 1137 if (m->m_flags & M_EOR) 1138 flags |= MSG_EOR; 1139 if (flags & MSG_PEEK) { 1140 m = m->m_next; 1141 moff = 0; 1142 orig_resid = 0; 1143 } else { 1144 nextrecord = m->m_nextpkt; 1145 sbfree(so, &so->so_rcv, m); 1146 if (mp) { 1147 *mp = m; 1148 mp = &m->m_next; 1149 so->so_rcv.sb_mb = m = m->m_next; 1150 *mp = NULL; 1151 } else { 1152 so->so_rcv.sb_mb = m_free(m); 1153 m = so->so_rcv.sb_mb; 1154 } 1155 /* 1156 * If m != NULL, we also know that 1157 * so->so_rcv.sb_mb != NULL. 1158 */ 1159 KASSERT(so->so_rcv.sb_mb == m); 1160 if (m) { 1161 m->m_nextpkt = nextrecord; 1162 if (nextrecord == NULL) 1163 so->so_rcv.sb_lastrecord = m; 1164 } else { 1165 so->so_rcv.sb_mb = nextrecord; 1166 SB_EMPTY_FIXUP(&so->so_rcv); 1167 } 1168 SBLASTRECORDCHK(&so->so_rcv, "soreceive 3"); 1169 SBLASTMBUFCHK(&so->so_rcv, "soreceive 3"); 1170 } 1171 } else { 1172 if (flags & MSG_PEEK) { 1173 moff += len; 1174 orig_resid = 0; 1175 } else { 1176 if (mp) 1177 *mp = m_copym(m, 0, len, M_WAIT); 1178 m->m_data += len; 1179 m->m_len -= len; 1180 so->so_rcv.sb_cc -= len; 1181 so->so_rcv.sb_datacc -= len; 1182 } 1183 } 1184 if (so->so_oobmark) { 1185 if ((flags & MSG_PEEK) == 0) { 1186 so->so_oobmark -= len; 1187 if (so->so_oobmark == 0) { 1188 so->so_rcv.sb_state |= SS_RCVATMARK; 1189 break; 1190 } 1191 } else { 1192 offset += len; 1193 if (offset == so->so_oobmark) 1194 break; 1195 } 1196 } 1197 if (flags & MSG_EOR) 1198 break; 1199 /* 1200 * If the MSG_WAITALL flag is set (for non-atomic socket), 1201 * we must not quit until "uio->uio_resid == 0" or an error 1202 * termination. If a signal/timeout occurs, return 1203 * with a short count but without error. 1204 * Keep sockbuf locked against other readers. 1205 */ 1206 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 1207 !sosendallatonce(so) && !nextrecord) { 1208 if (so->so_rcv.sb_state & SS_CANTRCVMORE || 1209 so->so_error) 1210 break; 1211 SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 2"); 1212 SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 2"); 1213 if (sbwait(so, &so->so_rcv)) { 1214 sb_mtx_unlock(&so->so_rcv); 1215 if (dosolock) 1216 sounlock_shared(so); 1217 sbunlock(&so->so_rcv); 1218 return (0); 1219 } 1220 if ((m = so->so_rcv.sb_mb) != NULL) 1221 nextrecord = m->m_nextpkt; 1222 } 1223 } 1224 1225 if (m && pr->pr_flags & PR_ATOMIC) { 1226 flags |= MSG_TRUNC; 1227 if ((flags & MSG_PEEK) == 0) 1228 (void) sbdroprecord(so, &so->so_rcv); 1229 } 1230 if ((flags & MSG_PEEK) == 0) { 1231 if (m == NULL) { 1232 /* 1233 * First part is an inline SB_EMPTY_FIXUP(). Second 1234 * part makes sure sb_lastrecord is up-to-date if 1235 * there is still data in the socket buffer. 1236 */ 1237 so->so_rcv.sb_mb = nextrecord; 1238 if (so->so_rcv.sb_mb == NULL) { 1239 so->so_rcv.sb_mbtail = NULL; 1240 so->so_rcv.sb_lastrecord = NULL; 1241 } else if (nextrecord->m_nextpkt == NULL) 1242 so->so_rcv.sb_lastrecord = nextrecord; 1243 } 1244 SBLASTRECORDCHK(&so->so_rcv, "soreceive 4"); 1245 SBLASTMBUFCHK(&so->so_rcv, "soreceive 4"); 1246 if (pr->pr_flags & PR_WANTRCVD) { 1247 sb_mtx_unlock(&so->so_rcv); 1248 if (!dosolock) 1249 solock_shared(so); 1250 pru_rcvd(so); 1251 if (!dosolock) 1252 sounlock_shared(so); 1253 sb_mtx_lock(&so->so_rcv); 1254 } 1255 } 1256 if (orig_resid == uio->uio_resid && orig_resid && 1257 (flags & MSG_EOR) == 0 && 1258 (so->so_rcv.sb_state & SS_CANTRCVMORE) == 0) { 1259 sb_mtx_unlock(&so->so_rcv); 1260 sbunlock(&so->so_rcv); 1261 goto restart; 1262 } 1263 1264 if (uio_error) 1265 error = uio_error; 1266 1267 if (flagsp) 1268 *flagsp |= flags; 1269 release: 1270 sb_mtx_unlock(&so->so_rcv); 1271 if (dosolock) 1272 sounlock_shared(so); 1273 sbunlock(&so->so_rcv); 1274 return (error); 1275 } 1276 1277 int 1278 soshutdown(struct socket *so, int how) 1279 { 1280 int error = 0; 1281 1282 switch (how) { 1283 case SHUT_RD: 1284 sorflush(so); 1285 break; 1286 case SHUT_RDWR: 1287 sorflush(so); 1288 /* FALLTHROUGH */ 1289 case SHUT_WR: 1290 solock(so); 1291 error = pru_shutdown(so); 1292 sounlock(so); 1293 break; 1294 default: 1295 error = EINVAL; 1296 break; 1297 } 1298 1299 return (error); 1300 } 1301 1302 void 1303 sorflush(struct socket *so) 1304 { 1305 struct sockbuf *sb = &so->so_rcv; 1306 struct mbuf *m; 1307 const struct protosw *pr = so->so_proto; 1308 int error; 1309 1310 error = sblock(sb, SBL_WAIT | SBL_NOINTR); 1311 /* with SBL_WAIT and SLB_NOINTR sblock() must not fail */ 1312 KASSERT(error == 0); 1313 1314 solock_shared(so); 1315 socantrcvmore(so); 1316 mtx_enter(&sb->sb_mtx); 1317 m = sb->sb_mb; 1318 memset(&sb->sb_startzero, 0, 1319 (caddr_t)&sb->sb_endzero - (caddr_t)&sb->sb_startzero); 1320 sb->sb_timeo_nsecs = INFSLP; 1321 mtx_leave(&sb->sb_mtx); 1322 sounlock_shared(so); 1323 sbunlock(sb); 1324 1325 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) 1326 (*pr->pr_domain->dom_dispose)(m); 1327 m_purge(m); 1328 } 1329 1330 #ifdef SOCKET_SPLICE 1331 1332 #define so_splicelen so_sp->ssp_len 1333 #define so_splicemax so_sp->ssp_max 1334 #define so_idletv so_sp->ssp_idletv 1335 #define so_idleto so_sp->ssp_idleto 1336 #define so_splicetask so_sp->ssp_task 1337 1338 void 1339 sosplice_solock_pair(struct socket *so1, struct socket *so2) 1340 { 1341 NET_LOCK_SHARED(); 1342 1343 if (so1 == so2) 1344 rw_enter_write(&so1->so_lock); 1345 else if (so1 < so2) { 1346 rw_enter_write(&so1->so_lock); 1347 rw_enter_write(&so2->so_lock); 1348 } else { 1349 rw_enter_write(&so2->so_lock); 1350 rw_enter_write(&so1->so_lock); 1351 } 1352 } 1353 1354 void 1355 sosplice_sounlock_pair(struct socket *so1, struct socket *so2) 1356 { 1357 if (so1 == so2) 1358 rw_exit_write(&so1->so_lock); 1359 else if (so1 < so2) { 1360 rw_exit_write(&so2->so_lock); 1361 rw_exit_write(&so1->so_lock); 1362 } else { 1363 rw_exit_write(&so1->so_lock); 1364 rw_exit_write(&so2->so_lock); 1365 } 1366 1367 NET_UNLOCK_SHARED(); 1368 } 1369 1370 int 1371 sosplice(struct socket *so, int fd, off_t max, struct timeval *tv) 1372 { 1373 struct file *fp; 1374 struct socket *sosp; 1375 struct taskq *tq; 1376 int error = 0; 1377 1378 if ((so->so_proto->pr_flags & PR_SPLICE) == 0) 1379 return (EPROTONOSUPPORT); 1380 if (max && max < 0) 1381 return (EINVAL); 1382 if (tv && (tv->tv_sec < 0 || !timerisvalid(tv))) 1383 return (EINVAL); 1384 1385 /* If no fd is given, unsplice by removing existing link. */ 1386 if (fd < 0) { 1387 if ((error = sblock(&so->so_rcv, SBL_WAIT)) != 0) 1388 return (error); 1389 if (so->so_sp && so->so_sp->ssp_socket) { 1390 sosp = soref(so->so_sp->ssp_socket); 1391 sounsplice(so, so->so_sp->ssp_socket, 0); 1392 sorele(sosp); 1393 } else 1394 error = EPROTO; 1395 sbunlock(&so->so_rcv); 1396 return (error); 1397 } 1398 1399 if (sosplice_taskq == NULL) { 1400 rw_enter_write(&sosplice_lock); 1401 if (sosplice_taskq == NULL) { 1402 tq = taskq_create("sosplice", 1, IPL_SOFTNET, 1403 TASKQ_MPSAFE); 1404 if (tq == NULL) { 1405 rw_exit_write(&sosplice_lock); 1406 return (ENOMEM); 1407 } 1408 /* Ensure the taskq is fully visible to other CPUs. */ 1409 membar_producer(); 1410 sosplice_taskq = tq; 1411 } 1412 rw_exit_write(&sosplice_lock); 1413 } else { 1414 /* Ensure the taskq is fully visible on this CPU. */ 1415 membar_consumer(); 1416 } 1417 1418 /* Find sosp, the drain socket where data will be spliced into. */ 1419 if ((error = getsock(curproc, fd, &fp)) != 0) 1420 return (error); 1421 sosp = fp->f_data; 1422 1423 if (sosp->so_proto->pr_usrreqs->pru_send != 1424 so->so_proto->pr_usrreqs->pru_send) { 1425 error = EPROTONOSUPPORT; 1426 goto frele; 1427 } 1428 1429 if ((error = sblock(&so->so_rcv, SBL_WAIT)) != 0) 1430 goto frele; 1431 if ((error = sblock(&sosp->so_snd, SBL_WAIT)) != 0) { 1432 sbunlock(&so->so_rcv); 1433 goto frele; 1434 } 1435 sosplice_solock_pair(so, sosp); 1436 1437 if ((so->so_options & SO_ACCEPTCONN) || 1438 (sosp->so_options & SO_ACCEPTCONN)) { 1439 error = EOPNOTSUPP; 1440 goto release; 1441 } 1442 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1443 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 1444 error = ENOTCONN; 1445 goto release; 1446 } 1447 if ((sosp->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) { 1448 error = ENOTCONN; 1449 goto release; 1450 } 1451 if (so->so_sp == NULL) { 1452 struct sosplice *so_sp; 1453 1454 so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1455 timeout_set_flags(&so_sp->ssp_idleto, soidle, so, 1456 KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); 1457 task_set(&so_sp->ssp_task, sotask, so); 1458 1459 so->so_sp = so_sp; 1460 } 1461 if (sosp->so_sp == NULL) { 1462 struct sosplice *so_sp; 1463 1464 so_sp = pool_get(&sosplice_pool, PR_WAITOK | PR_ZERO); 1465 timeout_set_flags(&so_sp->ssp_idleto, soidle, sosp, 1466 KCLOCK_NONE, TIMEOUT_PROC | TIMEOUT_MPSAFE); 1467 task_set(&so_sp->ssp_task, sotask, sosp); 1468 1469 sosp->so_sp = so_sp; 1470 } 1471 if (so->so_sp->ssp_socket || sosp->so_sp->ssp_soback) { 1472 error = EBUSY; 1473 goto release; 1474 } 1475 1476 so->so_splicelen = 0; 1477 so->so_splicemax = max; 1478 if (tv) 1479 so->so_idletv = *tv; 1480 else 1481 timerclear(&so->so_idletv); 1482 1483 /* 1484 * To prevent sorwakeup() calling somove() before this somove() 1485 * has finished, the socket buffers are not marked as spliced yet. 1486 */ 1487 1488 /* Splice so and sosp together. */ 1489 mtx_enter(&so->so_rcv.sb_mtx); 1490 mtx_enter(&sosp->so_snd.sb_mtx); 1491 so->so_sp->ssp_socket = sosp; 1492 sosp->so_sp->ssp_soback = so; 1493 mtx_leave(&sosp->so_snd.sb_mtx); 1494 mtx_leave(&so->so_rcv.sb_mtx); 1495 1496 sosplice_sounlock_pair(so, sosp); 1497 sbunlock(&sosp->so_snd); 1498 1499 if (somove(so, M_WAIT)) { 1500 mtx_enter(&so->so_rcv.sb_mtx); 1501 mtx_enter(&sosp->so_snd.sb_mtx); 1502 so->so_rcv.sb_flags |= SB_SPLICE; 1503 sosp->so_snd.sb_flags |= SB_SPLICE; 1504 mtx_leave(&sosp->so_snd.sb_mtx); 1505 mtx_leave(&so->so_rcv.sb_mtx); 1506 } 1507 1508 sbunlock(&so->so_rcv); 1509 FRELE(fp, curproc); 1510 return (0); 1511 1512 release: 1513 sosplice_sounlock_pair(so, sosp); 1514 sbunlock(&sosp->so_snd); 1515 sbunlock(&so->so_rcv); 1516 frele: 1517 FRELE(fp, curproc); 1518 return (error); 1519 } 1520 1521 void 1522 sounsplice(struct socket *so, struct socket *sosp, int freeing) 1523 { 1524 sbassertlocked(&so->so_rcv); 1525 1526 mtx_enter(&so->so_rcv.sb_mtx); 1527 mtx_enter(&sosp->so_snd.sb_mtx); 1528 so->so_rcv.sb_flags &= ~SB_SPLICE; 1529 sosp->so_snd.sb_flags &= ~SB_SPLICE; 1530 so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL; 1531 mtx_leave(&sosp->so_snd.sb_mtx); 1532 mtx_leave(&so->so_rcv.sb_mtx); 1533 1534 task_del(sosplice_taskq, &so->so_splicetask); 1535 timeout_del(&so->so_idleto); 1536 1537 /* Do not wakeup a socket that is about to be freed. */ 1538 if ((freeing & SOSP_FREEING_READ) == 0) { 1539 int readable; 1540 1541 solock_shared(so); 1542 mtx_enter(&so->so_rcv.sb_mtx); 1543 readable = soreadable(so); 1544 mtx_leave(&so->so_rcv.sb_mtx); 1545 if (readable) 1546 sorwakeup(so); 1547 sounlock_shared(so); 1548 } 1549 if ((freeing & SOSP_FREEING_WRITE) == 0) { 1550 solock_shared(sosp); 1551 if (sowriteable(sosp)) 1552 sowwakeup(sosp); 1553 sounlock_shared(sosp); 1554 } 1555 } 1556 1557 void 1558 soidle(void *arg) 1559 { 1560 struct socket *so = arg; 1561 1562 sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); 1563 if (so->so_rcv.sb_flags & SB_SPLICE) { 1564 struct socket *sosp; 1565 1566 WRITE_ONCE(so->so_error, ETIMEDOUT); 1567 sosp = soref(so->so_sp->ssp_socket); 1568 sounsplice(so, so->so_sp->ssp_socket, 0); 1569 sorele(sosp); 1570 } 1571 sbunlock(&so->so_rcv); 1572 } 1573 1574 void 1575 sotask(void *arg) 1576 { 1577 struct socket *so = arg; 1578 int doyield = 0; 1579 1580 sblock(&so->so_rcv, SBL_WAIT | SBL_NOINTR); 1581 if (so->so_rcv.sb_flags & SB_SPLICE) { 1582 if (so->so_proto->pr_flags & PR_WANTRCVD) 1583 doyield = 1; 1584 somove(so, M_DONTWAIT); 1585 } 1586 sbunlock(&so->so_rcv); 1587 1588 if (doyield) { 1589 /* Avoid user land starvation. */ 1590 yield(); 1591 } 1592 } 1593 1594 /* 1595 * Move data from receive buffer of spliced source socket to send 1596 * buffer of drain socket. Try to move as much as possible in one 1597 * big chunk. It is a TCP only implementation. 1598 * Return value 0 means splicing has been finished, 1 continue. 1599 */ 1600 int 1601 somove(struct socket *so, int wait) 1602 { 1603 struct socket *sosp = so->so_sp->ssp_socket; 1604 struct mbuf *m, **mp, *nextrecord; 1605 u_long len, off, oobmark; 1606 long space; 1607 int error = 0, maxreached = 0, unsplice = 0; 1608 unsigned int rcvstate; 1609 1610 sbassertlocked(&so->so_rcv); 1611 1612 if (so->so_proto->pr_flags & PR_WANTRCVD) 1613 sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); 1614 1615 mtx_enter(&so->so_rcv.sb_mtx); 1616 mtx_enter(&sosp->so_snd.sb_mtx); 1617 1618 nextpkt: 1619 if ((error = READ_ONCE(so->so_error))) 1620 goto release; 1621 if (sosp->so_snd.sb_state & SS_CANTSENDMORE) { 1622 error = EPIPE; 1623 goto release; 1624 } 1625 1626 error = READ_ONCE(sosp->so_error); 1627 if (error) { 1628 if (error != ETIMEDOUT && error != EFBIG && error != ELOOP) 1629 goto release; 1630 error = 0; 1631 } 1632 if ((sosp->so_state & SS_ISCONNECTED) == 0) 1633 goto release; 1634 1635 /* Calculate how many bytes can be copied now. */ 1636 len = so->so_rcv.sb_datacc; 1637 if (so->so_splicemax) { 1638 KASSERT(so->so_splicelen < so->so_splicemax); 1639 if (so->so_splicemax <= so->so_splicelen + len) { 1640 len = so->so_splicemax - so->so_splicelen; 1641 maxreached = 1; 1642 } 1643 } 1644 space = sbspace_locked(sosp, &sosp->so_snd); 1645 if (so->so_oobmark && so->so_oobmark < len && 1646 so->so_oobmark < space + 1024) 1647 space += 1024; 1648 if (space <= 0) { 1649 maxreached = 0; 1650 goto release; 1651 } 1652 if (space < len) { 1653 maxreached = 0; 1654 if (space < sosp->so_snd.sb_lowat) 1655 goto release; 1656 len = space; 1657 } 1658 sosp->so_snd.sb_state |= SS_ISSENDING; 1659 1660 SBLASTRECORDCHK(&so->so_rcv, "somove 1"); 1661 SBLASTMBUFCHK(&so->so_rcv, "somove 1"); 1662 m = so->so_rcv.sb_mb; 1663 if (m == NULL) 1664 goto release; 1665 nextrecord = m->m_nextpkt; 1666 1667 /* Drop address and control information not used with splicing. */ 1668 if (so->so_proto->pr_flags & PR_ADDR) { 1669 #ifdef DIAGNOSTIC 1670 if (m->m_type != MT_SONAME) 1671 panic("somove soname: so %p, so_type %d, m %p, " 1672 "m_type %d", so, so->so_type, m, m->m_type); 1673 #endif 1674 m = m->m_next; 1675 } 1676 while (m && m->m_type == MT_CONTROL) 1677 m = m->m_next; 1678 if (m == NULL) { 1679 sbdroprecord(so, &so->so_rcv); 1680 if (so->so_proto->pr_flags & PR_WANTRCVD) { 1681 mtx_leave(&sosp->so_snd.sb_mtx); 1682 mtx_leave(&so->so_rcv.sb_mtx); 1683 solock_shared(so); 1684 pru_rcvd(so); 1685 sounlock_shared(so); 1686 mtx_enter(&so->so_rcv.sb_mtx); 1687 mtx_enter(&sosp->so_snd.sb_mtx); 1688 } 1689 goto nextpkt; 1690 } 1691 1692 /* 1693 * By splicing sockets connected to localhost, userland might create a 1694 * loop. Dissolve splicing with error if loop is detected by counter. 1695 * 1696 * If we deal with looped broadcast/multicast packet we bail out with 1697 * no error to suppress splice termination. 1698 */ 1699 if ((m->m_flags & M_PKTHDR) && 1700 ((m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) || 1701 ((m->m_flags & M_LOOP) && (m->m_flags & (M_BCAST|M_MCAST))))) { 1702 error = ELOOP; 1703 goto release; 1704 } 1705 1706 if (so->so_proto->pr_flags & PR_ATOMIC) { 1707 if ((m->m_flags & M_PKTHDR) == 0) 1708 panic("somove !PKTHDR: so %p, so_type %d, m %p, " 1709 "m_type %d", so, so->so_type, m, m->m_type); 1710 if (sosp->so_snd.sb_hiwat < m->m_pkthdr.len) { 1711 error = EMSGSIZE; 1712 goto release; 1713 } 1714 if (len < m->m_pkthdr.len) 1715 goto release; 1716 if (m->m_pkthdr.len < len) { 1717 maxreached = 0; 1718 len = m->m_pkthdr.len; 1719 } 1720 /* 1721 * Throw away the name mbuf after it has been assured 1722 * that the whole first record can be processed. 1723 */ 1724 m = so->so_rcv.sb_mb; 1725 sbfree(so, &so->so_rcv, m); 1726 so->so_rcv.sb_mb = m_free(m); 1727 sbsync(&so->so_rcv, nextrecord); 1728 } 1729 /* 1730 * Throw away the control mbufs after it has been assured 1731 * that the whole first record can be processed. 1732 */ 1733 m = so->so_rcv.sb_mb; 1734 while (m && m->m_type == MT_CONTROL) { 1735 sbfree(so, &so->so_rcv, m); 1736 so->so_rcv.sb_mb = m_free(m); 1737 m = so->so_rcv.sb_mb; 1738 sbsync(&so->so_rcv, nextrecord); 1739 } 1740 1741 SBLASTRECORDCHK(&so->so_rcv, "somove 2"); 1742 SBLASTMBUFCHK(&so->so_rcv, "somove 2"); 1743 1744 /* Take at most len mbufs out of receive buffer. */ 1745 for (off = 0, mp = &m; off <= len && *mp; 1746 off += (*mp)->m_len, mp = &(*mp)->m_next) { 1747 u_long size = len - off; 1748 1749 #ifdef DIAGNOSTIC 1750 if ((*mp)->m_type != MT_DATA && (*mp)->m_type != MT_HEADER) 1751 panic("somove type: so %p, so_type %d, m %p, " 1752 "m_type %d", so, so->so_type, *mp, (*mp)->m_type); 1753 #endif 1754 if ((*mp)->m_len > size) { 1755 /* 1756 * Move only a partial mbuf at maximum splice length or 1757 * if the drain buffer is too small for this large mbuf. 1758 */ 1759 if (!maxreached && sosp->so_snd.sb_datacc > 0) { 1760 len -= size; 1761 break; 1762 } 1763 *mp = m_copym(so->so_rcv.sb_mb, 0, size, wait); 1764 if (*mp == NULL) { 1765 len -= size; 1766 break; 1767 } 1768 so->so_rcv.sb_mb->m_data += size; 1769 so->so_rcv.sb_mb->m_len -= size; 1770 so->so_rcv.sb_cc -= size; 1771 so->so_rcv.sb_datacc -= size; 1772 } else { 1773 *mp = so->so_rcv.sb_mb; 1774 sbfree(so, &so->so_rcv, *mp); 1775 so->so_rcv.sb_mb = (*mp)->m_next; 1776 sbsync(&so->so_rcv, nextrecord); 1777 } 1778 } 1779 *mp = NULL; 1780 1781 SBLASTRECORDCHK(&so->so_rcv, "somove 3"); 1782 SBLASTMBUFCHK(&so->so_rcv, "somove 3"); 1783 SBCHECK(so, &so->so_rcv); 1784 if (m == NULL) 1785 goto release; 1786 m->m_nextpkt = NULL; 1787 if (m->m_flags & M_PKTHDR) { 1788 m_resethdr(m); 1789 m->m_pkthdr.len = len; 1790 } 1791 1792 /* Send window update to source peer as receive buffer has changed. */ 1793 if (so->so_proto->pr_flags & PR_WANTRCVD) { 1794 mtx_leave(&sosp->so_snd.sb_mtx); 1795 mtx_leave(&so->so_rcv.sb_mtx); 1796 solock_shared(so); 1797 pru_rcvd(so); 1798 sounlock_shared(so); 1799 mtx_enter(&so->so_rcv.sb_mtx); 1800 mtx_enter(&sosp->so_snd.sb_mtx); 1801 } 1802 1803 /* Receive buffer did shrink by len bytes, adjust oob. */ 1804 rcvstate = so->so_rcv.sb_state; 1805 so->so_rcv.sb_state &= ~SS_RCVATMARK; 1806 oobmark = so->so_oobmark; 1807 so->so_oobmark = oobmark > len ? oobmark - len : 0; 1808 if (oobmark) { 1809 if (oobmark == len) 1810 so->so_rcv.sb_state |= SS_RCVATMARK; 1811 if (oobmark >= len) 1812 oobmark = 0; 1813 } 1814 1815 /* 1816 * Handle oob data. If any malloc fails, ignore error. 1817 * TCP urgent data is not very reliable anyway. 1818 */ 1819 while (((rcvstate & SS_RCVATMARK) || oobmark) && 1820 (so->so_options & SO_OOBINLINE)) { 1821 struct mbuf *o = NULL; 1822 1823 if (rcvstate & SS_RCVATMARK) { 1824 o = m_get(wait, MT_DATA); 1825 rcvstate &= ~SS_RCVATMARK; 1826 } else if (oobmark) { 1827 o = m_split(m, oobmark, wait); 1828 if (o) { 1829 mtx_leave(&sosp->so_snd.sb_mtx); 1830 mtx_leave(&so->so_rcv.sb_mtx); 1831 solock_shared(sosp); 1832 error = pru_send(sosp, m, NULL, NULL); 1833 sounlock_shared(sosp); 1834 mtx_enter(&so->so_rcv.sb_mtx); 1835 mtx_enter(&sosp->so_snd.sb_mtx); 1836 1837 if (error) { 1838 if (sosp->so_snd.sb_state & 1839 SS_CANTSENDMORE) 1840 error = EPIPE; 1841 m_freem(o); 1842 goto release; 1843 } 1844 len -= oobmark; 1845 so->so_splicelen += oobmark; 1846 m = o; 1847 o = m_get(wait, MT_DATA); 1848 } 1849 oobmark = 0; 1850 } 1851 if (o) { 1852 o->m_len = 1; 1853 *mtod(o, caddr_t) = *mtod(m, caddr_t); 1854 1855 mtx_leave(&sosp->so_snd.sb_mtx); 1856 mtx_leave(&so->so_rcv.sb_mtx); 1857 solock_shared(sosp); 1858 error = pru_sendoob(sosp, o, NULL, NULL); 1859 sounlock_shared(sosp); 1860 mtx_enter(&so->so_rcv.sb_mtx); 1861 mtx_enter(&sosp->so_snd.sb_mtx); 1862 1863 if (error) { 1864 if (sosp->so_snd.sb_state & SS_CANTSENDMORE) 1865 error = EPIPE; 1866 m_freem(m); 1867 goto release; 1868 } 1869 len -= 1; 1870 so->so_splicelen += 1; 1871 if (oobmark) { 1872 oobmark -= 1; 1873 if (oobmark == 0) 1874 rcvstate |= SS_RCVATMARK; 1875 } 1876 m_adj(m, 1); 1877 } 1878 } 1879 1880 /* Append all remaining data to drain socket. */ 1881 if (so->so_rcv.sb_cc == 0 || maxreached) 1882 sosp->so_snd.sb_state &= ~SS_ISSENDING; 1883 1884 mtx_leave(&sosp->so_snd.sb_mtx); 1885 mtx_leave(&so->so_rcv.sb_mtx); 1886 solock_shared(sosp); 1887 error = pru_send(sosp, m, NULL, NULL); 1888 sounlock_shared(sosp); 1889 mtx_enter(&so->so_rcv.sb_mtx); 1890 mtx_enter(&sosp->so_snd.sb_mtx); 1891 1892 if (error) { 1893 if (sosp->so_snd.sb_state & SS_CANTSENDMORE || 1894 sosp->so_pcb == NULL) 1895 error = EPIPE; 1896 goto release; 1897 } 1898 so->so_splicelen += len; 1899 1900 /* Move several packets if possible. */ 1901 if (!maxreached && nextrecord) 1902 goto nextpkt; 1903 1904 release: 1905 sosp->so_snd.sb_state &= ~SS_ISSENDING; 1906 1907 if (!error && maxreached && so->so_splicemax == so->so_splicelen) 1908 error = EFBIG; 1909 if (error) 1910 WRITE_ONCE(so->so_error, error); 1911 1912 if (((so->so_rcv.sb_state & SS_CANTRCVMORE) && 1913 so->so_rcv.sb_cc == 0) || 1914 (sosp->so_snd.sb_state & SS_CANTSENDMORE) || 1915 maxreached || error) 1916 unsplice = 1; 1917 1918 mtx_leave(&sosp->so_snd.sb_mtx); 1919 mtx_leave(&so->so_rcv.sb_mtx); 1920 1921 if (so->so_proto->pr_flags & PR_WANTRCVD) 1922 sbunlock(&so->so_snd); 1923 1924 if (unsplice) { 1925 soref(sosp); 1926 sounsplice(so, sosp, 0); 1927 sorele(sosp); 1928 1929 return (0); 1930 } 1931 if (timerisset(&so->so_idletv)) 1932 timeout_add_tv(&so->so_idleto, &so->so_idletv); 1933 return (1); 1934 } 1935 #endif /* SOCKET_SPLICE */ 1936 1937 void 1938 sorwakeup(struct socket *so) 1939 { 1940 if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) 1941 soassertlocked_readonly(so); 1942 1943 #ifdef SOCKET_SPLICE 1944 if (so->so_proto->pr_flags & PR_SPLICE) { 1945 sb_mtx_lock(&so->so_rcv); 1946 if (so->so_rcv.sb_flags & SB_SPLICE) 1947 task_add(sosplice_taskq, &so->so_splicetask); 1948 if (isspliced(so)) { 1949 sb_mtx_unlock(&so->so_rcv); 1950 return; 1951 } 1952 sb_mtx_unlock(&so->so_rcv); 1953 } 1954 #endif 1955 sowakeup(so, &so->so_rcv); 1956 if (so->so_upcall) 1957 (*(so->so_upcall))(so, so->so_upcallarg, M_DONTWAIT); 1958 } 1959 1960 void 1961 sowwakeup(struct socket *so) 1962 { 1963 if ((so->so_snd.sb_flags & SB_MTXLOCK) == 0) 1964 soassertlocked_readonly(so); 1965 1966 #ifdef SOCKET_SPLICE 1967 if (so->so_proto->pr_flags & PR_SPLICE) { 1968 sb_mtx_lock(&so->so_snd); 1969 if (so->so_snd.sb_flags & SB_SPLICE) 1970 task_add(sosplice_taskq, 1971 &so->so_sp->ssp_soback->so_splicetask); 1972 if (issplicedback(so)) { 1973 sb_mtx_unlock(&so->so_snd); 1974 return; 1975 } 1976 sb_mtx_unlock(&so->so_snd); 1977 } 1978 #endif 1979 sowakeup(so, &so->so_snd); 1980 } 1981 1982 int 1983 sosetopt(struct socket *so, int level, int optname, struct mbuf *m) 1984 { 1985 int error = 0; 1986 1987 if (level != SOL_SOCKET) { 1988 if (so->so_proto->pr_ctloutput) { 1989 solock(so); 1990 error = (*so->so_proto->pr_ctloutput)(PRCO_SETOPT, so, 1991 level, optname, m); 1992 sounlock(so); 1993 return (error); 1994 } 1995 error = ENOPROTOOPT; 1996 } else { 1997 switch (optname) { 1998 1999 case SO_LINGER: 2000 if (m == NULL || m->m_len != sizeof (struct linger) || 2001 mtod(m, struct linger *)->l_linger < 0 || 2002 mtod(m, struct linger *)->l_linger > SHRT_MAX) 2003 return (EINVAL); 2004 2005 solock(so); 2006 so->so_linger = mtod(m, struct linger *)->l_linger; 2007 if (*mtod(m, int *)) 2008 so->so_options |= optname; 2009 else 2010 so->so_options &= ~optname; 2011 sounlock(so); 2012 2013 break; 2014 case SO_BINDANY: 2015 if ((error = suser(curproc)) != 0) /* XXX */ 2016 return (error); 2017 /* FALLTHROUGH */ 2018 2019 case SO_DEBUG: 2020 case SO_KEEPALIVE: 2021 case SO_USELOOPBACK: 2022 case SO_BROADCAST: 2023 case SO_REUSEADDR: 2024 case SO_REUSEPORT: 2025 case SO_OOBINLINE: 2026 case SO_TIMESTAMP: 2027 case SO_ZEROIZE: 2028 if (m == NULL || m->m_len < sizeof (int)) 2029 return (EINVAL); 2030 2031 solock(so); 2032 if (*mtod(m, int *)) 2033 so->so_options |= optname; 2034 else 2035 so->so_options &= ~optname; 2036 sounlock(so); 2037 2038 break; 2039 case SO_DONTROUTE: 2040 if (m == NULL || m->m_len < sizeof (int)) 2041 return (EINVAL); 2042 if (*mtod(m, int *)) 2043 error = EOPNOTSUPP; 2044 break; 2045 2046 case SO_SNDBUF: 2047 case SO_RCVBUF: 2048 case SO_SNDLOWAT: 2049 case SO_RCVLOWAT: 2050 { 2051 struct sockbuf *sb = (optname == SO_SNDBUF || 2052 optname == SO_SNDLOWAT ? 2053 &so->so_snd : &so->so_rcv); 2054 u_long cnt; 2055 2056 if (m == NULL || m->m_len < sizeof (int)) 2057 return (EINVAL); 2058 cnt = *mtod(m, int *); 2059 if ((long)cnt <= 0) 2060 cnt = 1; 2061 2062 if (((sb->sb_flags & SB_MTXLOCK) == 0)) 2063 solock(so); 2064 mtx_enter(&sb->sb_mtx); 2065 2066 switch (optname) { 2067 case SO_SNDBUF: 2068 case SO_RCVBUF: 2069 if (sb->sb_state & 2070 (SS_CANTSENDMORE | SS_CANTRCVMORE)) { 2071 error = EINVAL; 2072 break; 2073 } 2074 if (sbcheckreserve(cnt, sb->sb_wat) || 2075 sbreserve(so, sb, cnt)) { 2076 error = ENOBUFS; 2077 break; 2078 } 2079 sb->sb_wat = cnt; 2080 break; 2081 case SO_SNDLOWAT: 2082 case SO_RCVLOWAT: 2083 sb->sb_lowat = (cnt > sb->sb_hiwat) ? 2084 sb->sb_hiwat : cnt; 2085 break; 2086 } 2087 2088 mtx_leave(&sb->sb_mtx); 2089 if (((sb->sb_flags & SB_MTXLOCK) == 0)) 2090 sounlock(so); 2091 2092 break; 2093 } 2094 2095 case SO_SNDTIMEO: 2096 case SO_RCVTIMEO: 2097 { 2098 struct sockbuf *sb = (optname == SO_SNDTIMEO ? 2099 &so->so_snd : &so->so_rcv); 2100 struct timeval tv; 2101 uint64_t nsecs; 2102 2103 if (m == NULL || m->m_len < sizeof (tv)) 2104 return (EINVAL); 2105 memcpy(&tv, mtod(m, struct timeval *), sizeof tv); 2106 if (!timerisvalid(&tv)) 2107 return (EINVAL); 2108 nsecs = TIMEVAL_TO_NSEC(&tv); 2109 if (nsecs == UINT64_MAX) 2110 return (EDOM); 2111 if (nsecs == 0) 2112 nsecs = INFSLP; 2113 2114 mtx_enter(&sb->sb_mtx); 2115 sb->sb_timeo_nsecs = nsecs; 2116 mtx_leave(&sb->sb_mtx); 2117 break; 2118 } 2119 2120 case SO_RTABLE: 2121 if (so->so_proto->pr_domain && 2122 so->so_proto->pr_domain->dom_protosw && 2123 so->so_proto->pr_ctloutput) { 2124 const struct domain *dom = 2125 so->so_proto->pr_domain; 2126 2127 level = dom->dom_protosw->pr_protocol; 2128 solock(so); 2129 error = (*so->so_proto->pr_ctloutput) 2130 (PRCO_SETOPT, so, level, optname, m); 2131 sounlock(so); 2132 } else 2133 error = ENOPROTOOPT; 2134 break; 2135 #ifdef SOCKET_SPLICE 2136 case SO_SPLICE: 2137 if (m == NULL) { 2138 error = sosplice(so, -1, 0, NULL); 2139 } else if (m->m_len < sizeof(int)) { 2140 error = EINVAL; 2141 } else if (m->m_len < sizeof(struct splice)) { 2142 error = sosplice(so, *mtod(m, int *), 0, NULL); 2143 } else { 2144 error = sosplice(so, 2145 mtod(m, struct splice *)->sp_fd, 2146 mtod(m, struct splice *)->sp_max, 2147 &mtod(m, struct splice *)->sp_idle); 2148 } 2149 break; 2150 #endif /* SOCKET_SPLICE */ 2151 2152 default: 2153 error = ENOPROTOOPT; 2154 break; 2155 } 2156 } 2157 2158 return (error); 2159 } 2160 2161 int 2162 sogetopt(struct socket *so, int level, int optname, struct mbuf *m) 2163 { 2164 int error = 0; 2165 2166 if (level != SOL_SOCKET) { 2167 if (so->so_proto->pr_ctloutput) { 2168 m->m_len = 0; 2169 2170 solock(so); 2171 error = (*so->so_proto->pr_ctloutput)(PRCO_GETOPT, so, 2172 level, optname, m); 2173 sounlock(so); 2174 return (error); 2175 } else 2176 return (ENOPROTOOPT); 2177 } else { 2178 m->m_len = sizeof (int); 2179 2180 switch (optname) { 2181 2182 case SO_LINGER: 2183 m->m_len = sizeof (struct linger); 2184 solock_shared(so); 2185 mtod(m, struct linger *)->l_onoff = 2186 so->so_options & SO_LINGER; 2187 mtod(m, struct linger *)->l_linger = so->so_linger; 2188 sounlock_shared(so); 2189 break; 2190 2191 case SO_BINDANY: 2192 case SO_USELOOPBACK: 2193 case SO_DEBUG: 2194 case SO_KEEPALIVE: 2195 case SO_REUSEADDR: 2196 case SO_REUSEPORT: 2197 case SO_BROADCAST: 2198 case SO_OOBINLINE: 2199 case SO_ACCEPTCONN: 2200 case SO_TIMESTAMP: 2201 case SO_ZEROIZE: 2202 *mtod(m, int *) = so->so_options & optname; 2203 break; 2204 2205 case SO_DONTROUTE: 2206 *mtod(m, int *) = 0; 2207 break; 2208 2209 case SO_TYPE: 2210 *mtod(m, int *) = so->so_type; 2211 break; 2212 2213 case SO_ERROR: 2214 solock(so); 2215 *mtod(m, int *) = so->so_error; 2216 so->so_error = 0; 2217 sounlock(so); 2218 2219 break; 2220 2221 case SO_DOMAIN: 2222 *mtod(m, int *) = so->so_proto->pr_domain->dom_family; 2223 break; 2224 2225 case SO_PROTOCOL: 2226 *mtod(m, int *) = so->so_proto->pr_protocol; 2227 break; 2228 2229 case SO_SNDBUF: 2230 *mtod(m, int *) = so->so_snd.sb_hiwat; 2231 break; 2232 2233 case SO_RCVBUF: 2234 *mtod(m, int *) = so->so_rcv.sb_hiwat; 2235 break; 2236 2237 case SO_SNDLOWAT: 2238 *mtod(m, int *) = so->so_snd.sb_lowat; 2239 break; 2240 2241 case SO_RCVLOWAT: 2242 *mtod(m, int *) = so->so_rcv.sb_lowat; 2243 break; 2244 2245 case SO_SNDTIMEO: 2246 case SO_RCVTIMEO: 2247 { 2248 struct sockbuf *sb = (optname == SO_SNDTIMEO ? 2249 &so->so_snd : &so->so_rcv); 2250 struct timeval tv; 2251 uint64_t nsecs; 2252 2253 mtx_enter(&sb->sb_mtx); 2254 nsecs = sb->sb_timeo_nsecs; 2255 mtx_leave(&sb->sb_mtx); 2256 2257 m->m_len = sizeof(struct timeval); 2258 memset(&tv, 0, sizeof(tv)); 2259 if (nsecs != INFSLP) 2260 NSEC_TO_TIMEVAL(nsecs, &tv); 2261 memcpy(mtod(m, struct timeval *), &tv, sizeof tv); 2262 break; 2263 } 2264 2265 case SO_RTABLE: 2266 if (so->so_proto->pr_domain && 2267 so->so_proto->pr_domain->dom_protosw && 2268 so->so_proto->pr_ctloutput) { 2269 const struct domain *dom = 2270 so->so_proto->pr_domain; 2271 2272 level = dom->dom_protosw->pr_protocol; 2273 solock(so); 2274 error = (*so->so_proto->pr_ctloutput) 2275 (PRCO_GETOPT, so, level, optname, m); 2276 sounlock(so); 2277 if (error) 2278 return (error); 2279 break; 2280 } 2281 return (ENOPROTOOPT); 2282 2283 #ifdef SOCKET_SPLICE 2284 case SO_SPLICE: 2285 { 2286 off_t len; 2287 2288 m->m_len = sizeof(off_t); 2289 solock_shared(so); 2290 len = so->so_sp ? so->so_sp->ssp_len : 0; 2291 sounlock_shared(so); 2292 memcpy(mtod(m, off_t *), &len, sizeof(off_t)); 2293 break; 2294 } 2295 #endif /* SOCKET_SPLICE */ 2296 2297 case SO_PEERCRED: 2298 if (so->so_proto->pr_protocol == AF_UNIX) { 2299 struct unpcb *unp = sotounpcb(so); 2300 2301 solock(so); 2302 if (unp->unp_flags & UNP_FEIDS) { 2303 m->m_len = sizeof(unp->unp_connid); 2304 memcpy(mtod(m, caddr_t), 2305 &(unp->unp_connid), m->m_len); 2306 sounlock(so); 2307 break; 2308 } 2309 sounlock(so); 2310 2311 return (ENOTCONN); 2312 } 2313 return (EOPNOTSUPP); 2314 2315 default: 2316 return (ENOPROTOOPT); 2317 } 2318 return (0); 2319 } 2320 } 2321 2322 void 2323 sohasoutofband(struct socket *so) 2324 { 2325 pgsigio(&so->so_sigio, SIGURG, 0); 2326 knote(&so->so_rcv.sb_klist, 0); 2327 } 2328 2329 void 2330 sofilt_lock(struct socket *so, struct sockbuf *sb) 2331 { 2332 switch (so->so_proto->pr_domain->dom_family) { 2333 case PF_INET: 2334 case PF_INET6: 2335 NET_LOCK_SHARED(); 2336 break; 2337 default: 2338 rw_enter_write(&so->so_lock); 2339 break; 2340 } 2341 2342 mtx_enter(&sb->sb_mtx); 2343 } 2344 2345 void 2346 sofilt_unlock(struct socket *so, struct sockbuf *sb) 2347 { 2348 mtx_leave(&sb->sb_mtx); 2349 2350 switch (so->so_proto->pr_domain->dom_family) { 2351 case PF_INET: 2352 case PF_INET6: 2353 NET_UNLOCK_SHARED(); 2354 break; 2355 default: 2356 rw_exit_write(&so->so_lock); 2357 break; 2358 } 2359 } 2360 2361 int 2362 soo_kqfilter(struct file *fp, struct knote *kn) 2363 { 2364 struct socket *so = kn->kn_fp->f_data; 2365 struct sockbuf *sb; 2366 2367 switch (kn->kn_filter) { 2368 case EVFILT_READ: 2369 kn->kn_fop = &soread_filtops; 2370 sb = &so->so_rcv; 2371 break; 2372 case EVFILT_WRITE: 2373 kn->kn_fop = &sowrite_filtops; 2374 sb = &so->so_snd; 2375 break; 2376 case EVFILT_EXCEPT: 2377 kn->kn_fop = &soexcept_filtops; 2378 sb = &so->so_rcv; 2379 break; 2380 default: 2381 return (EINVAL); 2382 } 2383 2384 klist_insert(&sb->sb_klist, kn); 2385 2386 return (0); 2387 } 2388 2389 void 2390 filt_sordetach(struct knote *kn) 2391 { 2392 struct socket *so = kn->kn_fp->f_data; 2393 2394 klist_remove(&so->so_rcv.sb_klist, kn); 2395 } 2396 2397 int 2398 filt_soread(struct knote *kn, long hint) 2399 { 2400 struct socket *so = kn->kn_fp->f_data; 2401 u_int state = READ_ONCE(so->so_state); 2402 u_int error = READ_ONCE(so->so_error); 2403 int rv = 0; 2404 2405 MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx); 2406 if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) 2407 soassertlocked_readonly(so); 2408 2409 if (so->so_options & SO_ACCEPTCONN) { 2410 short qlen = READ_ONCE(so->so_qlen); 2411 2412 if (so->so_rcv.sb_flags & SB_MTXLOCK) 2413 soassertlocked_readonly(so); 2414 2415 kn->kn_data = qlen; 2416 rv = (kn->kn_data != 0); 2417 2418 if (kn->kn_flags & (__EV_POLL | __EV_SELECT)) { 2419 if (state & SS_ISDISCONNECTED) { 2420 kn->kn_flags |= __EV_HUP; 2421 rv = 1; 2422 } else { 2423 rv = qlen || soreadable(so); 2424 } 2425 } 2426 2427 return rv; 2428 } 2429 2430 kn->kn_data = so->so_rcv.sb_cc; 2431 #ifdef SOCKET_SPLICE 2432 if (isspliced(so)) { 2433 rv = 0; 2434 } else 2435 #endif /* SOCKET_SPLICE */ 2436 if (so->so_rcv.sb_state & SS_CANTRCVMORE) { 2437 kn->kn_flags |= EV_EOF; 2438 if (kn->kn_flags & __EV_POLL) { 2439 if (state & SS_ISDISCONNECTED) 2440 kn->kn_flags |= __EV_HUP; 2441 } 2442 kn->kn_fflags = error; 2443 rv = 1; 2444 } else if (error) { 2445 rv = 1; 2446 } else if (kn->kn_sfflags & NOTE_LOWAT) { 2447 rv = (kn->kn_data >= kn->kn_sdata); 2448 } else { 2449 rv = (kn->kn_data >= so->so_rcv.sb_lowat); 2450 } 2451 2452 return rv; 2453 } 2454 2455 void 2456 filt_sowdetach(struct knote *kn) 2457 { 2458 struct socket *so = kn->kn_fp->f_data; 2459 2460 klist_remove(&so->so_snd.sb_klist, kn); 2461 } 2462 2463 int 2464 filt_sowrite(struct knote *kn, long hint) 2465 { 2466 struct socket *so = kn->kn_fp->f_data; 2467 u_int state = READ_ONCE(so->so_state); 2468 u_int error = READ_ONCE(so->so_error); 2469 int rv; 2470 2471 MUTEX_ASSERT_LOCKED(&so->so_snd.sb_mtx); 2472 if ((so->so_snd.sb_flags & SB_MTXLOCK) == 0) 2473 soassertlocked_readonly(so); 2474 2475 kn->kn_data = sbspace_locked(so, &so->so_snd); 2476 if (so->so_snd.sb_state & SS_CANTSENDMORE) { 2477 kn->kn_flags |= EV_EOF; 2478 if (kn->kn_flags & __EV_POLL) { 2479 if (state & SS_ISDISCONNECTED) 2480 kn->kn_flags |= __EV_HUP; 2481 } 2482 kn->kn_fflags = error; 2483 rv = 1; 2484 } else if (error) { 2485 rv = 1; 2486 } else if (((state & SS_ISCONNECTED) == 0) && 2487 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 2488 rv = 0; 2489 } else if (kn->kn_sfflags & NOTE_LOWAT) { 2490 rv = (kn->kn_data >= kn->kn_sdata); 2491 } else { 2492 rv = (kn->kn_data >= so->so_snd.sb_lowat); 2493 } 2494 2495 return (rv); 2496 } 2497 2498 int 2499 filt_soexcept(struct knote *kn, long hint) 2500 { 2501 struct socket *so = kn->kn_fp->f_data; 2502 int rv = 0; 2503 2504 MUTEX_ASSERT_LOCKED(&so->so_rcv.sb_mtx); 2505 if ((so->so_rcv.sb_flags & SB_MTXLOCK) == 0) 2506 soassertlocked_readonly(so); 2507 2508 #ifdef SOCKET_SPLICE 2509 if (isspliced(so)) { 2510 rv = 0; 2511 } else 2512 #endif /* SOCKET_SPLICE */ 2513 if (kn->kn_sfflags & NOTE_OOB) { 2514 if (so->so_oobmark || (so->so_rcv.sb_state & SS_RCVATMARK)) { 2515 kn->kn_fflags |= NOTE_OOB; 2516 kn->kn_data -= so->so_oobmark; 2517 rv = 1; 2518 } 2519 } 2520 2521 if (kn->kn_flags & __EV_POLL) { 2522 u_int state = READ_ONCE(so->so_state); 2523 2524 if (state & SS_ISDISCONNECTED) { 2525 kn->kn_flags |= __EV_HUP; 2526 rv = 1; 2527 } 2528 } 2529 2530 return rv; 2531 } 2532 2533 int 2534 filt_sowmodify(struct kevent *kev, struct knote *kn) 2535 { 2536 struct socket *so = kn->kn_fp->f_data; 2537 int rv; 2538 2539 sofilt_lock(so, &so->so_snd); 2540 rv = knote_modify(kev, kn); 2541 sofilt_unlock(so, &so->so_snd); 2542 2543 return (rv); 2544 } 2545 2546 int 2547 filt_sowprocess(struct knote *kn, struct kevent *kev) 2548 { 2549 struct socket *so = kn->kn_fp->f_data; 2550 int rv; 2551 2552 sofilt_lock(so, &so->so_snd); 2553 rv = knote_process(kn, kev); 2554 sofilt_unlock(so, &so->so_snd); 2555 2556 return (rv); 2557 } 2558 2559 int 2560 filt_sormodify(struct kevent *kev, struct knote *kn) 2561 { 2562 struct socket *so = kn->kn_fp->f_data; 2563 int rv; 2564 2565 sofilt_lock(so, &so->so_rcv); 2566 rv = knote_modify(kev, kn); 2567 sofilt_unlock(so, &so->so_rcv); 2568 2569 return (rv); 2570 } 2571 2572 int 2573 filt_sorprocess(struct knote *kn, struct kevent *kev) 2574 { 2575 struct socket *so = kn->kn_fp->f_data; 2576 int rv; 2577 2578 sofilt_lock(so, &so->so_rcv); 2579 rv = knote_process(kn, kev); 2580 sofilt_unlock(so, &so->so_rcv); 2581 2582 return (rv); 2583 } 2584 2585 #ifdef DDB 2586 void 2587 sobuf_print(struct sockbuf *, 2588 int (*)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))); 2589 2590 void 2591 sobuf_print(struct sockbuf *sb, 2592 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2593 { 2594 (*pr)("\tsb_cc: %lu\n", sb->sb_cc); 2595 (*pr)("\tsb_datacc: %lu\n", sb->sb_datacc); 2596 (*pr)("\tsb_hiwat: %lu\n", sb->sb_hiwat); 2597 (*pr)("\tsb_wat: %lu\n", sb->sb_wat); 2598 (*pr)("\tsb_mbcnt: %lu\n", sb->sb_mbcnt); 2599 (*pr)("\tsb_mbmax: %lu\n", sb->sb_mbmax); 2600 (*pr)("\tsb_lowat: %ld\n", sb->sb_lowat); 2601 (*pr)("\tsb_mb: %p\n", sb->sb_mb); 2602 (*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail); 2603 (*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord); 2604 (*pr)("\tsb_flags: %04x\n", sb->sb_flags); 2605 (*pr)("\tsb_state: %04x\n", sb->sb_state); 2606 (*pr)("\tsb_timeo_nsecs: %llu\n", sb->sb_timeo_nsecs); 2607 } 2608 2609 void 2610 so_print(void *v, 2611 int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2)))) 2612 { 2613 struct socket *so = v; 2614 2615 (*pr)("socket %p\n", so); 2616 (*pr)("so_type: %i\n", so->so_type); 2617 (*pr)("so_options: 0x%04x\n", so->so_options); /* %b */ 2618 (*pr)("so_linger: %i\n", so->so_linger); 2619 (*pr)("so_state: 0x%04x\n", so->so_state); 2620 (*pr)("so_pcb: %p\n", so->so_pcb); 2621 (*pr)("so_proto: %p\n", so->so_proto); 2622 (*pr)("so_sigio: %p\n", so->so_sigio.sir_sigio); 2623 2624 (*pr)("so_head: %p\n", so->so_head); 2625 (*pr)("so_onq: %p\n", so->so_onq); 2626 (*pr)("so_q0: @%p first: %p\n", &so->so_q0, TAILQ_FIRST(&so->so_q0)); 2627 (*pr)("so_q: @%p first: %p\n", &so->so_q, TAILQ_FIRST(&so->so_q)); 2628 (*pr)("so_eq: next: %p\n", TAILQ_NEXT(so, so_qe)); 2629 (*pr)("so_q0len: %i\n", so->so_q0len); 2630 (*pr)("so_qlen: %i\n", so->so_qlen); 2631 (*pr)("so_qlimit: %i\n", so->so_qlimit); 2632 (*pr)("so_timeo: %i\n", so->so_timeo); 2633 (*pr)("so_obmark: %lu\n", so->so_oobmark); 2634 2635 (*pr)("so_sp: %p\n", so->so_sp); 2636 if (so->so_sp != NULL) { 2637 (*pr)("\tssp_socket: %p\n", so->so_sp->ssp_socket); 2638 (*pr)("\tssp_soback: %p\n", so->so_sp->ssp_soback); 2639 (*pr)("\tssp_len: %lld\n", 2640 (unsigned long long)so->so_sp->ssp_len); 2641 (*pr)("\tssp_max: %lld\n", 2642 (unsigned long long)so->so_sp->ssp_max); 2643 (*pr)("\tssp_idletv: %lld %ld\n", so->so_sp->ssp_idletv.tv_sec, 2644 so->so_sp->ssp_idletv.tv_usec); 2645 (*pr)("\tssp_idleto: %spending (@%i)\n", 2646 timeout_pending(&so->so_sp->ssp_idleto) ? "" : "not ", 2647 so->so_sp->ssp_idleto.to_time); 2648 } 2649 2650 (*pr)("so_rcv:\n"); 2651 sobuf_print(&so->so_rcv, pr); 2652 (*pr)("so_snd:\n"); 2653 sobuf_print(&so->so_snd, pr); 2654 2655 (*pr)("so_upcall: %p so_upcallarg: %p\n", 2656 so->so_upcall, so->so_upcallarg); 2657 2658 (*pr)("so_euid: %d so_ruid: %d\n", so->so_euid, so->so_ruid); 2659 (*pr)("so_egid: %d so_rgid: %d\n", so->so_egid, so->so_rgid); 2660 (*pr)("so_cpid: %d\n", so->so_cpid); 2661 } 2662 #endif 2663