1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1991, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 63 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.27 2004/01/02 04:06:42 ambrisko Exp $ 64 */ 65 66 #include "opt_ipsec.h" 67 #include "opt_inet6.h" 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/malloc.h> 72 #include <sys/mbuf.h> 73 #include <sys/domain.h> 74 #include <sys/protosw.h> 75 #include <sys/socket.h> 76 #include <sys/socketvar.h> 77 #include <sys/proc.h> 78 #include <sys/priv.h> 79 #include <sys/jail.h> 80 #include <sys/kernel.h> 81 #include <sys/sysctl.h> 82 83 #include <sys/thread2.h> 84 #include <sys/socketvar2.h> 85 #include <sys/msgport2.h> 86 87 #include <machine/limits.h> 88 89 #include <net/if.h> 90 #include <net/if_types.h> 91 #include <net/route.h> 92 93 #include <netinet/in.h> 94 #include <netinet/in_pcb.h> 95 #include <netinet/in_var.h> 96 #include <netinet/ip_var.h> 97 #ifdef INET6 98 #include <netinet/ip6.h> 99 #include <netinet6/ip6_var.h> 100 #endif /* INET6 */ 101 102 #ifdef IPSEC 103 #include <netinet6/ipsec.h> 104 #include <netproto/key/key.h> 105 #include <netproto/ipsec/esp_var.h> 106 #endif 107 108 #ifdef FAST_IPSEC 109 #if defined(IPSEC) || defined(IPSEC_ESP) 110 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" 111 #endif 112 113 #include <netproto/ipsec/ipsec.h> 114 #include <netproto/ipsec/key.h> 115 #define IPSEC 116 #endif /* FAST_IPSEC */ 117 118 #define INP_LOCALGROUP_SIZMIN 8 119 #define INP_LOCALGROUP_SIZMAX 256 120 121 struct in_addr zeroin_addr; 122 123 /* 124 * These configure the range of local port addresses assigned to 125 * "unspecified" outgoing connections/packets/whatever. 126 */ 127 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 128 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 129 130 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */ 131 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */ 132 133 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 134 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 135 136 #define RANGECHK(var, min, max) \ 137 if ((var) < (min)) { (var) = (min); } \ 138 else if ((var) > (max)) { (var) = (max); } 139 140 int udpencap_enable = 1; /* enabled by default */ 141 int udpencap_port = 4500; /* triggers decapsulation */ 142 143 static int 144 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 145 { 146 int error; 147 148 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 149 if (!error) { 150 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 151 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 152 153 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 154 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 155 156 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 157 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 158 } 159 return (error); 160 } 161 162 #undef RANGECHK 163 164 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 165 166 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 167 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 168 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 169 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 170 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 171 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 172 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 173 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 174 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 175 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 176 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 177 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 178 179 /* 180 * in_pcb.c: manage the Protocol Control Blocks. 181 * 182 * NOTE: It is assumed that most of these functions will be called from 183 * a critical section. XXX - There are, unfortunately, a few exceptions 184 * to this rule that should be fixed. 185 * 186 * NOTE: The caller should initialize the cpu field to the cpu running the 187 * protocol stack associated with this inpcbinfo. 188 */ 189 190 void 191 in_pcbinfo_init(struct inpcbinfo *pcbinfo) 192 { 193 LIST_INIT(&pcbinfo->pcblisthead); 194 pcbinfo->cpu = -1; 195 pcbinfo->portsave = kmalloc(sizeof(*pcbinfo->portsave), M_PCB, 196 M_WAITOK | M_ZERO); 197 } 198 199 struct baddynamicports baddynamicports; 200 201 /* 202 * Check if the specified port is invalid for dynamic allocation. 203 */ 204 int 205 in_baddynamic(u_int16_t port, u_int16_t proto) 206 { 207 switch (proto) { 208 case IPPROTO_TCP: 209 return (DP_ISSET(baddynamicports.tcp, port)); 210 case IPPROTO_UDP: 211 #ifdef IPSEC 212 /* Cannot preset this as it is a sysctl */ 213 if (port == udpencap_port) 214 return (1); 215 #endif 216 return (DP_ISSET(baddynamicports.udp, port)); 217 default: 218 return (0); 219 } 220 } 221 222 223 /* 224 * Allocate a PCB and associate it with the socket. 225 */ 226 int 227 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) 228 { 229 struct inpcb *inp; 230 #ifdef IPSEC 231 int error; 232 #endif 233 234 inp = kmalloc(pcbinfo->ipi_size, M_PCB, M_WAITOK|M_ZERO|M_NULLOK); 235 if (inp == NULL) 236 return (ENOMEM); 237 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 238 inp->inp_pcbinfo = inp->inp_cpcbinfo = pcbinfo; 239 inp->inp_socket = so; 240 #ifdef IPSEC 241 error = ipsec_init_policy(so, &inp->inp_sp); 242 if (error != 0) { 243 kfree(inp, M_PCB); 244 return (error); 245 } 246 #endif 247 #ifdef INET6 248 if (INP_SOCKAF(so) == AF_INET6 && ip6_v6only) 249 inp->inp_flags |= IN6P_IPV6_V6ONLY; 250 if (ip6_auto_flowlabel) 251 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 252 #endif 253 soreference(so); 254 so->so_pcb = inp; 255 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list); 256 pcbinfo->ipi_count++; 257 return (0); 258 } 259 260 /* 261 * Unlink a pcb with the intention of moving it to another cpu with a 262 * different pcbinfo. While unlinked nothing should attempt to dereference 263 * inp_pcbinfo, NULL it out so we assert if it does. 264 */ 265 void 266 in_pcbunlink(struct inpcb *inp, struct inpcbinfo *pcbinfo) 267 { 268 KASSERT(inp->inp_pcbinfo == pcbinfo, ("pcbinfo mismatch")); 269 KASSERT(inp->inp_cpcbinfo == pcbinfo, ("cpcbinfo mismatch")); 270 KASSERT((inp->inp_flags & (INP_WILDCARD | INP_CONNECTED)) == 0, 271 ("already linked")); 272 273 LIST_REMOVE(inp, inp_list); 274 pcbinfo->ipi_count--; 275 inp->inp_pcbinfo = NULL; 276 inp->inp_cpcbinfo = NULL; 277 } 278 279 /* 280 * Relink a pcb into a new pcbinfo. 281 */ 282 void 283 in_pcblink(struct inpcb *inp, struct inpcbinfo *pcbinfo) 284 { 285 KASSERT(inp->inp_pcbinfo == NULL, ("has pcbinfo")); 286 KASSERT(inp->inp_cpcbinfo == NULL, ("has cpcbinfo")); 287 KASSERT((inp->inp_flags & (INP_WILDCARD | INP_CONNECTED)) == 0, 288 ("already linked")); 289 290 inp->inp_cpcbinfo = pcbinfo; 291 inp->inp_pcbinfo = pcbinfo; 292 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list); 293 pcbinfo->ipi_count++; 294 } 295 296 int 297 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 298 { 299 struct socket *so = inp->inp_socket; 300 unsigned short *lastport; 301 struct sockaddr_in *sin; 302 struct sockaddr_in jsin; 303 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 304 struct ucred *cred = NULL; 305 u_short lport = 0; 306 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 307 int error; 308 309 if (TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) /* XXX broken! */ 310 return (EADDRNOTAVAIL); 311 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 312 return (EINVAL); /* already bound */ 313 314 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) 315 wild = 1; /* neither SO_REUSEADDR nor SO_REUSEPORT is set */ 316 if (td->td_proc) 317 cred = td->td_proc->p_ucred; 318 319 /* 320 * This has to be atomic. If the porthash is shared across multiple 321 * protocol threads (aka tcp) then the token will be non-NULL. 322 */ 323 if (pcbinfo->porttoken) 324 lwkt_gettoken(pcbinfo->porttoken); 325 326 if (nam != NULL) { 327 sin = (struct sockaddr_in *)nam; 328 if (nam->sa_len != sizeof *sin) { 329 error = EINVAL; 330 goto done; 331 } 332 #ifdef notdef 333 /* 334 * We should check the family, but old programs 335 * incorrectly fail to initialize it. 336 */ 337 if (sin->sin_family != AF_INET) { 338 error = EAFNOSUPPORT; 339 goto done; 340 } 341 #endif 342 if (!prison_replace_wildcards(td, nam)) { 343 error = EINVAL; 344 goto done; 345 } 346 lport = sin->sin_port; 347 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 348 /* 349 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 350 * allow complete duplication of binding if 351 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 352 * and a multicast address is bound on both 353 * new and duplicated sockets. 354 */ 355 if (so->so_options & SO_REUSEADDR) 356 reuseport = SO_REUSEADDR | SO_REUSEPORT; 357 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 358 sin->sin_port = 0; /* yech... */ 359 bzero(&sin->sin_zero, sizeof sin->sin_zero); 360 if (ifa_ifwithaddr((struct sockaddr *)sin) == NULL) { 361 error = EADDRNOTAVAIL; 362 goto done; 363 } 364 } 365 if (lport != 0) { 366 struct inpcb *t; 367 368 /* GROSS */ 369 if (ntohs(lport) < IPPORT_RESERVED && 370 cred && 371 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0)) { 372 error = EACCES; 373 goto done; 374 } 375 if (so->so_cred->cr_uid != 0 && 376 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 377 t = in_pcblookup_local(pcbinfo, 378 sin->sin_addr, 379 lport, 380 INPLOOKUP_WILDCARD, 381 cred); 382 if (t && 383 (!in_nullhost(sin->sin_addr) || 384 !in_nullhost(t->inp_laddr) || 385 (t->inp_socket->so_options & 386 SO_REUSEPORT) == 0) && 387 (so->so_cred->cr_uid != 388 t->inp_socket->so_cred->cr_uid)) { 389 #ifdef INET6 390 if (!in_nullhost(sin->sin_addr) || 391 !in_nullhost(t->inp_laddr) || 392 INP_SOCKAF(so) == 393 INP_SOCKAF(t->inp_socket)) 394 #endif 395 { 396 error = EADDRINUSE; 397 goto done; 398 } 399 } 400 } 401 if (cred && !prison_replace_wildcards(td, nam)) { 402 error = EADDRNOTAVAIL; 403 goto done; 404 } 405 t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport, 406 wild, cred); 407 if (t && !(reuseport & t->inp_socket->so_options)) { 408 #ifdef INET6 409 if (!in_nullhost(sin->sin_addr) || 410 !in_nullhost(t->inp_laddr) || 411 INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) 412 #endif 413 { 414 error = EADDRINUSE; 415 goto done; 416 } 417 } 418 } 419 inp->inp_laddr = sin->sin_addr; 420 } 421 if (lport == 0) { 422 ushort first, last; 423 int count; 424 425 jsin.sin_family = AF_INET; 426 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 427 if (!prison_replace_wildcards(td, (struct sockaddr *)&jsin)) { 428 inp->inp_laddr.s_addr = INADDR_ANY; 429 error = EINVAL; 430 goto done; 431 } 432 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 433 434 inp->inp_flags |= INP_ANONPORT; 435 436 if (inp->inp_flags & INP_HIGHPORT) { 437 first = ipport_hifirstauto; /* sysctl */ 438 last = ipport_hilastauto; 439 lastport = &pcbinfo->lasthi; 440 } else if (inp->inp_flags & INP_LOWPORT) { 441 if (cred && 442 (error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0))) { 443 inp->inp_laddr.s_addr = INADDR_ANY; 444 goto done; 445 } 446 first = ipport_lowfirstauto; /* 1023 */ 447 last = ipport_lowlastauto; /* 600 */ 448 lastport = &pcbinfo->lastlow; 449 } else { 450 first = ipport_firstauto; /* sysctl */ 451 last = ipport_lastauto; 452 lastport = &pcbinfo->lastport; 453 } 454 /* 455 * Simple check to ensure all ports are not used up causing 456 * a deadlock here. 457 * 458 * We split the two cases (up and down) so that the direction 459 * is not being tested on each round of the loop. 460 */ 461 if (first > last) { 462 /* 463 * counting down 464 */ 465 count = first - last; 466 467 do { 468 if (count-- < 0) { /* completely used? */ 469 inp->inp_laddr.s_addr = INADDR_ANY; 470 error = EADDRNOTAVAIL; 471 goto done; 472 } 473 --*lastport; 474 if (*lastport > first || *lastport < last) 475 *lastport = first; 476 lport = htons(*lastport); 477 } while (in_pcblookup_local(pcbinfo, inp->inp_laddr, 478 lport, wild, cred)); 479 } else { 480 /* 481 * counting up 482 */ 483 count = last - first; 484 485 do { 486 if (count-- < 0) { /* completely used? */ 487 inp->inp_laddr.s_addr = INADDR_ANY; 488 error = EADDRNOTAVAIL; 489 goto done; 490 } 491 ++*lastport; 492 if (*lastport < first || *lastport > last) 493 *lastport = first; 494 lport = htons(*lastport); 495 } while (in_pcblookup_local(pcbinfo, inp->inp_laddr, 496 lport, wild, cred)); 497 } 498 } 499 inp->inp_lport = lport; 500 501 jsin.sin_family = AF_INET; 502 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 503 if (!prison_replace_wildcards(td, (struct sockaddr*)&jsin)) { 504 inp->inp_laddr.s_addr = INADDR_ANY; 505 inp->inp_lport = 0; 506 error = EINVAL; 507 goto done; 508 } 509 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 510 511 if (in_pcbinsporthash(inp) != 0) { 512 inp->inp_laddr.s_addr = INADDR_ANY; 513 inp->inp_lport = 0; 514 error = EAGAIN; 515 goto done; 516 } 517 error = 0; 518 done: 519 if (pcbinfo->porttoken) 520 lwkt_reltoken(pcbinfo->porttoken); 521 return error; 522 } 523 524 static struct inpcb * 525 in_pcblookup_addrport(struct inpcbinfo *pcbinfo, struct in_addr laddr, 526 u_short lport, struct in_addr faddr, u_short fport, struct ucred *cred) 527 { 528 struct inpcb *inp; 529 struct inpcbporthead *porthash; 530 struct inpcbport *phd; 531 struct inpcb *match = NULL; 532 533 /* 534 * If the porthashbase is shared across several cpus we need 535 * to lock. 536 */ 537 if (pcbinfo->porttoken) 538 lwkt_gettoken(pcbinfo->porttoken); 539 540 /* 541 * Best fit PCB lookup. 542 * 543 * First see if this local port is in use by looking on the 544 * port hash list. 545 */ 546 porthash = &pcbinfo->porthashbase[ 547 INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; 548 LIST_FOREACH(phd, porthash, phd_hash) { 549 if (phd->phd_port == lport) 550 break; 551 } 552 if (phd != NULL) { 553 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 554 #ifdef INET6 555 if ((inp->inp_vflag & INP_IPV4) == 0) 556 continue; 557 #endif 558 if (inp->inp_laddr.s_addr != INADDR_ANY && 559 inp->inp_laddr.s_addr != laddr.s_addr) 560 continue; 561 562 if (inp->inp_faddr.s_addr != INADDR_ANY && 563 inp->inp_faddr.s_addr != faddr.s_addr) 564 continue; 565 566 if (inp->inp_fport != 0 && inp->inp_fport != fport) 567 continue; 568 569 if (cred == NULL || 570 cred->cr_prison == 571 inp->inp_socket->so_cred->cr_prison) { 572 match = inp; 573 break; 574 } 575 } 576 } 577 if (pcbinfo->porttoken) 578 lwkt_reltoken(pcbinfo->porttoken); 579 return (match); 580 } 581 582 int 583 in_pcbconn_bind(struct inpcb *inp, const struct sockaddr *nam, 584 struct thread *td) 585 { 586 struct proc *p = td->td_proc; 587 unsigned short *lastport; 588 const struct sockaddr_in *sin = (const struct sockaddr_in *)nam; 589 struct sockaddr_in jsin; 590 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 591 struct ucred *cred = NULL; 592 u_short lport = 0; 593 ushort first, last; 594 int count, error, dup = 0; 595 596 if (TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) /* XXX broken! */ 597 return (EADDRNOTAVAIL); 598 599 KKASSERT(inp->inp_laddr.s_addr != INADDR_ANY); 600 if (inp->inp_lport != 0) 601 return (EINVAL); /* already bound */ 602 603 KKASSERT(p); 604 cred = p->p_ucred; 605 606 /* 607 * This has to be atomic. If the porthash is shared across multiple 608 * protocol threads (aka tcp) then the token will be non-NULL. 609 */ 610 if (pcbinfo->porttoken) 611 lwkt_gettoken(pcbinfo->porttoken); 612 613 jsin.sin_family = AF_INET; 614 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 615 if (!prison_replace_wildcards(td, (struct sockaddr *)&jsin)) { 616 inp->inp_laddr.s_addr = INADDR_ANY; 617 error = EINVAL; 618 goto done; 619 } 620 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 621 622 inp->inp_flags |= INP_ANONPORT; 623 624 if (inp->inp_flags & INP_HIGHPORT) { 625 first = ipport_hifirstauto; /* sysctl */ 626 last = ipport_hilastauto; 627 lastport = &pcbinfo->lasthi; 628 } else if (inp->inp_flags & INP_LOWPORT) { 629 if (cred && 630 (error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0))) { 631 inp->inp_laddr.s_addr = INADDR_ANY; 632 goto done; 633 } 634 first = ipport_lowfirstauto; /* 1023 */ 635 last = ipport_lowlastauto; /* 600 */ 636 lastport = &pcbinfo->lastlow; 637 } else { 638 first = ipport_firstauto; /* sysctl */ 639 last = ipport_lastauto; 640 lastport = &pcbinfo->lastport; 641 } 642 643 again: 644 /* 645 * Simple check to ensure all ports are not used up causing 646 * a deadlock here. 647 * 648 * We split the two cases (up and down) so that the direction 649 * is not being tested on each round of the loop. 650 */ 651 if (first > last) { 652 /* 653 * counting down 654 */ 655 count = first - last; 656 657 do { 658 if (count-- < 0) { /* completely used? */ 659 inp->inp_laddr.s_addr = INADDR_ANY; 660 error = EADDRNOTAVAIL; 661 goto done; 662 } 663 --*lastport; 664 if (*lastport > first || *lastport < last) 665 *lastport = first; 666 lport = htons(*lastport); 667 } while (in_pcblookup_addrport(pcbinfo, inp->inp_laddr, lport, 668 sin->sin_addr, sin->sin_port, cred)); 669 } else { 670 /* 671 * counting up 672 */ 673 count = last - first; 674 675 do { 676 if (count-- < 0) { /* completely used? */ 677 inp->inp_laddr.s_addr = INADDR_ANY; 678 error = EADDRNOTAVAIL; 679 goto done; 680 } 681 ++*lastport; 682 if (*lastport < first || *lastport > last) 683 *lastport = first; 684 lport = htons(*lastport); 685 } while (in_pcblookup_addrport(pcbinfo, inp->inp_laddr, lport, 686 sin->sin_addr, sin->sin_port, cred)); 687 } 688 689 /* This could happen on loopback interface */ 690 if (sin->sin_port == lport && 691 sin->sin_addr.s_addr == inp->inp_laddr.s_addr) { 692 if (dup) { 693 /* 694 * Duplicate again; give up 695 */ 696 inp->inp_laddr.s_addr = INADDR_ANY; 697 error = EADDRNOTAVAIL; 698 goto done; 699 } 700 dup = 1; 701 goto again; 702 } 703 inp->inp_lport = lport; 704 705 jsin.sin_family = AF_INET; 706 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 707 if (!prison_replace_wildcards(td, (struct sockaddr*)&jsin)) { 708 inp->inp_laddr.s_addr = INADDR_ANY; 709 inp->inp_lport = 0; 710 error = EINVAL; 711 goto done; 712 } 713 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 714 715 if (in_pcbinsporthash(inp) != 0) { 716 inp->inp_laddr.s_addr = INADDR_ANY; 717 inp->inp_lport = 0; 718 error = EAGAIN; 719 goto done; 720 } 721 error = 0; 722 done: 723 if (pcbinfo->porttoken) 724 lwkt_reltoken(pcbinfo->porttoken); 725 return error; 726 } 727 728 /* 729 * Transform old in_pcbconnect() into an inner subroutine for new 730 * in_pcbconnect(): Do some validity-checking on the remote 731 * address (in mbuf 'nam') and then determine local host address 732 * (i.e., which interface) to use to access that remote host. 733 * 734 * This preserves definition of in_pcbconnect(), while supporting a 735 * slightly different version for T/TCP. (This is more than 736 * a bit of a kludge, but cleaning up the internal interfaces would 737 * have forced minor changes in every protocol). 738 */ 739 int 740 in_pcbladdr_find(struct inpcb *inp, struct sockaddr *nam, 741 struct sockaddr_in **plocal_sin, struct thread *td, int find) 742 { 743 struct in_ifaddr *ia; 744 struct ucred *cred = NULL; 745 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 746 struct sockaddr *jsin; 747 int jailed = 0, alloc_route = 0; 748 749 if (nam->sa_len != sizeof *sin) 750 return (EINVAL); 751 if (sin->sin_family != AF_INET) 752 return (EAFNOSUPPORT); 753 if (sin->sin_port == 0) 754 return (EADDRNOTAVAIL); 755 if (td && td->td_proc && td->td_proc->p_ucred) 756 cred = td->td_proc->p_ucred; 757 if (cred && cred->cr_prison) 758 jailed = 1; 759 if (!TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) { 760 ia = TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia; 761 /* 762 * If the destination address is INADDR_ANY, 763 * use the primary local address. 764 * If the supplied address is INADDR_BROADCAST, 765 * and the primary interface supports broadcast, 766 * choose the broadcast address for that interface. 767 */ 768 if (sin->sin_addr.s_addr == INADDR_ANY) 769 sin->sin_addr = IA_SIN(ia)->sin_addr; 770 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST && 771 (ia->ia_ifp->if_flags & IFF_BROADCAST)) 772 sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr; 773 } 774 if (find) { 775 struct route *ro; 776 777 ia = NULL; 778 /* 779 * If route is known or can be allocated now, 780 * our src addr is taken from the i/f, else punt. 781 * Note that we should check the address family of the cached 782 * destination, in case of sharing the cache with IPv6. 783 */ 784 ro = &inp->inp_route; 785 if (ro->ro_rt && 786 (!(ro->ro_rt->rt_flags & RTF_UP) || 787 ro->ro_dst.sa_family != AF_INET || 788 satosin(&ro->ro_dst)->sin_addr.s_addr != 789 sin->sin_addr.s_addr || 790 inp->inp_socket->so_options & SO_DONTROUTE)) { 791 RTFREE(ro->ro_rt); 792 ro->ro_rt = NULL; 793 } 794 if (!(inp->inp_socket->so_options & SO_DONTROUTE) && /*XXX*/ 795 (ro->ro_rt == NULL || 796 ro->ro_rt->rt_ifp == NULL)) { 797 /* No route yet, so try to acquire one */ 798 bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); 799 ro->ro_dst.sa_family = AF_INET; 800 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 801 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = 802 sin->sin_addr; 803 rtalloc(ro); 804 alloc_route = 1; 805 } 806 /* 807 * If we found a route, use the address 808 * corresponding to the outgoing interface 809 * unless it is the loopback (in case a route 810 * to our address on another net goes to loopback). 811 */ 812 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { 813 if (jailed) { 814 if (jailed_ip(cred->cr_prison, 815 ro->ro_rt->rt_ifa->ifa_addr)) { 816 ia = ifatoia(ro->ro_rt->rt_ifa); 817 } 818 } else { 819 ia = ifatoia(ro->ro_rt->rt_ifa); 820 } 821 } 822 if (ia == NULL) { 823 u_short fport = sin->sin_port; 824 825 sin->sin_port = 0; 826 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); 827 if (ia && jailed && !jailed_ip(cred->cr_prison, 828 sintosa(&ia->ia_addr))) 829 ia = NULL; 830 if (ia == NULL) 831 ia = ifatoia(ifa_ifwithnet(sintosa(sin))); 832 if (ia && jailed && !jailed_ip(cred->cr_prison, 833 sintosa(&ia->ia_addr))) 834 ia = NULL; 835 sin->sin_port = fport; 836 if (ia == NULL && 837 !TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) 838 ia = TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia; 839 if (ia && jailed && !jailed_ip(cred->cr_prison, 840 sintosa(&ia->ia_addr))) 841 ia = NULL; 842 843 if (!jailed && ia == NULL) 844 goto fail; 845 } 846 /* 847 * If the destination address is multicast and an outgoing 848 * interface has been set as a multicast option, use the 849 * address of that interface as our source address. 850 */ 851 if (!jailed && IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && 852 inp->inp_moptions != NULL) { 853 struct ip_moptions *imo; 854 struct ifnet *ifp; 855 856 imo = inp->inp_moptions; 857 if (imo->imo_multicast_ifp != NULL) { 858 struct in_ifaddr_container *iac; 859 860 ifp = imo->imo_multicast_ifp; 861 ia = NULL; 862 TAILQ_FOREACH(iac, 863 &in_ifaddrheads[mycpuid], ia_link) { 864 if (iac->ia->ia_ifp == ifp) { 865 ia = iac->ia; 866 break; 867 } 868 } 869 if (ia == NULL) 870 goto fail; 871 } 872 } 873 /* 874 * Don't do pcblookup call here; return interface in plocal_sin 875 * and exit to caller, that will do the lookup. 876 */ 877 if (ia == NULL && jailed) { 878 if ((jsin = prison_get_nonlocal(cred->cr_prison, AF_INET, NULL)) != NULL || 879 (jsin = prison_get_local(cred->cr_prison, AF_INET, NULL)) != NULL) { 880 *plocal_sin = satosin(jsin); 881 } else { 882 /* IPv6 only Jail */ 883 goto fail; 884 } 885 } else { 886 *plocal_sin = &ia->ia_addr; 887 } 888 } 889 return (0); 890 fail: 891 if (alloc_route) { 892 struct route *ro = &inp->inp_route; 893 894 if (ro->ro_rt != NULL) 895 RTFREE(ro->ro_rt); 896 bzero(ro, sizeof(*ro)); 897 } 898 return (EADDRNOTAVAIL); 899 } 900 901 int 902 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, 903 struct sockaddr_in **plocal_sin, struct thread *td) 904 { 905 return in_pcbladdr_find(inp, nam, plocal_sin, td, 906 (inp->inp_laddr.s_addr == INADDR_ANY)); 907 } 908 909 /* 910 * Outer subroutine: 911 * Connect from a socket to a specified address. 912 * Both address and port must be specified in argument sin. 913 * If don't have a local address for this socket yet, 914 * then pick one. 915 */ 916 int 917 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 918 { 919 struct sockaddr_in *if_sin; 920 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 921 int error; 922 923 /* Call inner routine to assign local interface address. */ 924 if ((error = in_pcbladdr(inp, nam, &if_sin, td)) != 0) 925 return (error); 926 927 if (in_pcblookup_hash(inp->inp_cpcbinfo, sin->sin_addr, sin->sin_port, 928 inp->inp_laddr.s_addr ? 929 inp->inp_laddr : if_sin->sin_addr, 930 inp->inp_lport, FALSE, NULL) != NULL) { 931 return (EADDRINUSE); 932 } 933 if (inp->inp_laddr.s_addr == INADDR_ANY) { 934 if (inp->inp_lport == 0) { 935 error = in_pcbbind(inp, NULL, td); 936 if (error) 937 return (error); 938 } 939 inp->inp_laddr = if_sin->sin_addr; 940 } 941 inp->inp_faddr = sin->sin_addr; 942 inp->inp_fport = sin->sin_port; 943 in_pcbinsconnhash(inp); 944 return (0); 945 } 946 947 void 948 in_pcbdisconnect(struct inpcb *inp) 949 { 950 951 inp->inp_faddr.s_addr = INADDR_ANY; 952 inp->inp_fport = 0; 953 in_pcbremconnhash(inp); 954 if (inp->inp_socket->so_state & SS_NOFDREF) 955 in_pcbdetach(inp); 956 } 957 958 void 959 in_pcbdetach(struct inpcb *inp) 960 { 961 struct socket *so = inp->inp_socket; 962 struct inpcbinfo *ipi = inp->inp_pcbinfo; 963 964 #ifdef IPSEC 965 ipsec4_delete_pcbpolicy(inp); 966 #endif /*IPSEC*/ 967 inp->inp_gencnt = ++ipi->ipi_gencnt; 968 KKASSERT((so->so_state & SS_ASSERTINPROG) == 0); 969 in_pcbremlists(inp); 970 so->so_pcb = NULL; 971 sofree(so); /* remove pcb ref */ 972 if (inp->inp_options) 973 m_free(inp->inp_options); 974 if (inp->inp_route.ro_rt) 975 rtfree(inp->inp_route.ro_rt); 976 ip_freemoptions(inp->inp_moptions); 977 inp->inp_vflag = 0; 978 kfree(inp, M_PCB); 979 } 980 981 /* 982 * The calling convention of in_setsockaddr() and in_setpeeraddr() was 983 * modified to match the pru_sockaddr() and pru_peeraddr() entry points 984 * in struct pr_usrreqs, so that protocols can just reference then directly 985 * without the need for a wrapper function. The socket must have a valid 986 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 987 * except through a kernel programming error, so it is acceptable to panic 988 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 989 * because there actually /is/ a programming error somewhere... XXX) 990 */ 991 int 992 in_setsockaddr(struct socket *so, struct sockaddr **nam) 993 { 994 struct inpcb *inp; 995 struct sockaddr_in *sin; 996 997 /* 998 * Do the malloc first in case it blocks. 999 */ 1000 sin = kmalloc(sizeof *sin, M_SONAME, M_WAITOK | M_ZERO); 1001 sin->sin_family = AF_INET; 1002 sin->sin_len = sizeof *sin; 1003 1004 crit_enter(); 1005 inp = so->so_pcb; 1006 if (!inp) { 1007 crit_exit(); 1008 kfree(sin, M_SONAME); 1009 return (ECONNRESET); 1010 } 1011 sin->sin_port = inp->inp_lport; 1012 sin->sin_addr = inp->inp_laddr; 1013 crit_exit(); 1014 1015 *nam = (struct sockaddr *)sin; 1016 return (0); 1017 } 1018 1019 void 1020 in_setsockaddr_dispatch(netmsg_t msg) 1021 { 1022 int error; 1023 1024 error = in_setsockaddr(msg->base.nm_so, msg->peeraddr.nm_nam); 1025 lwkt_replymsg(&msg->lmsg, error); 1026 } 1027 1028 int 1029 in_setpeeraddr(struct socket *so, struct sockaddr **nam) 1030 { 1031 struct inpcb *inp; 1032 struct sockaddr_in *sin; 1033 1034 /* 1035 * Do the malloc first in case it blocks. 1036 */ 1037 sin = kmalloc(sizeof *sin, M_SONAME, M_WAITOK | M_ZERO); 1038 sin->sin_family = AF_INET; 1039 sin->sin_len = sizeof *sin; 1040 1041 crit_enter(); 1042 inp = so->so_pcb; 1043 if (!inp) { 1044 crit_exit(); 1045 kfree(sin, M_SONAME); 1046 return (ECONNRESET); 1047 } 1048 sin->sin_port = inp->inp_fport; 1049 sin->sin_addr = inp->inp_faddr; 1050 crit_exit(); 1051 1052 *nam = (struct sockaddr *)sin; 1053 return (0); 1054 } 1055 1056 void 1057 in_setpeeraddr_dispatch(netmsg_t msg) 1058 { 1059 int error; 1060 1061 error = in_setpeeraddr(msg->base.nm_so, msg->peeraddr.nm_nam); 1062 lwkt_replymsg(&msg->lmsg, error); 1063 } 1064 1065 void 1066 in_pcbnotifyall(struct inpcbhead *head, struct in_addr faddr, int err, 1067 void (*notify)(struct inpcb *, int)) 1068 { 1069 struct inpcb *inp, *ninp; 1070 1071 /* 1072 * note: if INP_PLACEMARKER is set we must ignore the rest of 1073 * the structure and skip it. 1074 */ 1075 crit_enter(); 1076 LIST_FOREACH_MUTABLE(inp, head, inp_list, ninp) { 1077 if (inp->inp_flags & INP_PLACEMARKER) 1078 continue; 1079 #ifdef INET6 1080 if (!(inp->inp_vflag & INP_IPV4)) 1081 continue; 1082 #endif 1083 if (inp->inp_faddr.s_addr != faddr.s_addr || 1084 inp->inp_socket == NULL) 1085 continue; 1086 (*notify)(inp, err); /* can remove inp from list! */ 1087 } 1088 crit_exit(); 1089 } 1090 1091 void 1092 in_pcbpurgeif0(struct inpcb *head, struct ifnet *ifp) 1093 { 1094 struct inpcb *inp; 1095 struct ip_moptions *imo; 1096 int i, gap; 1097 1098 for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) { 1099 if (inp->inp_flags & INP_PLACEMARKER) 1100 continue; 1101 imo = inp->inp_moptions; 1102 if ((inp->inp_vflag & INP_IPV4) && imo != NULL) { 1103 /* 1104 * Unselect the outgoing interface if it is being 1105 * detached. 1106 */ 1107 if (imo->imo_multicast_ifp == ifp) 1108 imo->imo_multicast_ifp = NULL; 1109 1110 /* 1111 * Drop multicast group membership if we joined 1112 * through the interface being detached. 1113 */ 1114 for (i = 0, gap = 0; i < imo->imo_num_memberships; 1115 i++) { 1116 if (imo->imo_membership[i]->inm_ifp == ifp) { 1117 in_delmulti(imo->imo_membership[i]); 1118 gap++; 1119 } else if (gap != 0) 1120 imo->imo_membership[i - gap] = 1121 imo->imo_membership[i]; 1122 } 1123 imo->imo_num_memberships -= gap; 1124 } 1125 } 1126 } 1127 1128 /* 1129 * Check for alternatives when higher level complains 1130 * about service problems. For now, invalidate cached 1131 * routing information. If the route was created dynamically 1132 * (by a redirect), time to try a default gateway again. 1133 */ 1134 void 1135 in_losing(struct inpcb *inp) 1136 { 1137 struct rtentry *rt; 1138 struct rt_addrinfo rtinfo; 1139 1140 if ((rt = inp->inp_route.ro_rt)) { 1141 bzero(&rtinfo, sizeof(struct rt_addrinfo)); 1142 rtinfo.rti_info[RTAX_DST] = rt_key(rt); 1143 rtinfo.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1144 rtinfo.rti_info[RTAX_NETMASK] = rt_mask(rt); 1145 rtinfo.rti_flags = rt->rt_flags; 1146 rt_missmsg(RTM_LOSING, &rtinfo, rt->rt_flags, 0); 1147 if (rt->rt_flags & RTF_DYNAMIC) { 1148 rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, 1149 rt_mask(rt), rt->rt_flags, NULL); 1150 } 1151 inp->inp_route.ro_rt = NULL; 1152 rtfree(rt); 1153 /* 1154 * A new route can be allocated 1155 * the next time output is attempted. 1156 */ 1157 } 1158 } 1159 1160 /* 1161 * After a routing change, flush old routing 1162 * and allocate a (hopefully) better one. 1163 */ 1164 void 1165 in_rtchange(struct inpcb *inp, int err) 1166 { 1167 if (inp->inp_route.ro_rt) { 1168 rtfree(inp->inp_route.ro_rt); 1169 inp->inp_route.ro_rt = NULL; 1170 /* 1171 * A new route can be allocated the next time 1172 * output is attempted. 1173 */ 1174 } 1175 } 1176 1177 /* 1178 * Lookup a PCB based on the local address and port. 1179 */ 1180 struct inpcb * 1181 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, 1182 u_int lport_arg, int wild_okay, struct ucred *cred) 1183 { 1184 struct inpcb *inp; 1185 int matchwild = 3, wildcard; 1186 u_short lport = lport_arg; 1187 struct inpcbporthead *porthash; 1188 struct inpcbport *phd; 1189 struct inpcb *match = NULL; 1190 1191 /* 1192 * If the porthashbase is shared across several cpus we need 1193 * to lock. 1194 */ 1195 if (pcbinfo->porttoken) 1196 lwkt_gettoken(pcbinfo->porttoken); 1197 1198 /* 1199 * Best fit PCB lookup. 1200 * 1201 * First see if this local port is in use by looking on the 1202 * port hash list. 1203 */ 1204 porthash = &pcbinfo->porthashbase[ 1205 INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; 1206 LIST_FOREACH(phd, porthash, phd_hash) { 1207 if (phd->phd_port == lport) 1208 break; 1209 } 1210 if (phd != NULL) { 1211 /* 1212 * Port is in use by one or more PCBs. Look for best 1213 * fit. 1214 */ 1215 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 1216 wildcard = 0; 1217 #ifdef INET6 1218 if ((inp->inp_vflag & INP_IPV4) == 0) 1219 continue; 1220 #endif 1221 if (inp->inp_faddr.s_addr != INADDR_ANY) 1222 wildcard++; 1223 if (inp->inp_laddr.s_addr != INADDR_ANY) { 1224 if (laddr.s_addr == INADDR_ANY) 1225 wildcard++; 1226 else if (inp->inp_laddr.s_addr != laddr.s_addr) 1227 continue; 1228 } else { 1229 if (laddr.s_addr != INADDR_ANY) 1230 wildcard++; 1231 } 1232 if (wildcard && !wild_okay) 1233 continue; 1234 if (wildcard < matchwild && 1235 (cred == NULL || 1236 cred->cr_prison == 1237 inp->inp_socket->so_cred->cr_prison)) { 1238 match = inp; 1239 matchwild = wildcard; 1240 if (matchwild == 0) { 1241 break; 1242 } 1243 } 1244 } 1245 } 1246 if (pcbinfo->porttoken) 1247 lwkt_reltoken(pcbinfo->porttoken); 1248 return (match); 1249 } 1250 1251 struct inpcb * 1252 in_pcblocalgroup_last(const struct inpcbinfo *pcbinfo, 1253 const struct inpcb *inp) 1254 { 1255 const struct inp_localgrphead *hdr; 1256 const struct inp_localgroup *grp; 1257 int i; 1258 1259 if (pcbinfo->localgrphashbase == NULL) 1260 return NULL; 1261 1262 hdr = &pcbinfo->localgrphashbase[ 1263 INP_PCBLOCALGRPHASH(inp->inp_lport, pcbinfo->localgrphashmask)]; 1264 1265 LIST_FOREACH(grp, hdr, il_list) { 1266 if (grp->il_vflag == inp->inp_vflag && 1267 grp->il_lport == inp->inp_lport && 1268 memcmp(&grp->il_dependladdr, 1269 &inp->inp_inc.inc_ie.ie_dependladdr, 1270 sizeof(grp->il_dependladdr)) == 0) { 1271 break; 1272 } 1273 } 1274 if (grp == NULL || grp->il_inpcnt == 1) 1275 return NULL; 1276 1277 KASSERT(grp->il_inpcnt >= 2, 1278 ("invalid localgroup inp count %d", grp->il_inpcnt)); 1279 for (i = 0; i < grp->il_inpcnt; ++i) { 1280 if (grp->il_inp[i] == inp) { 1281 int last = grp->il_inpcnt - 1; 1282 1283 if (i == last) 1284 last = grp->il_inpcnt - 2; 1285 return grp->il_inp[last]; 1286 } 1287 } 1288 return NULL; 1289 } 1290 1291 static struct inpcb * 1292 inp_localgroup_lookup(const struct inpcbinfo *pcbinfo, 1293 struct in_addr laddr, uint16_t lport, uint32_t pkt_hash) 1294 { 1295 struct inpcb *local_wild = NULL; 1296 const struct inp_localgrphead *hdr; 1297 const struct inp_localgroup *grp; 1298 1299 hdr = &pcbinfo->localgrphashbase[ 1300 INP_PCBLOCALGRPHASH(lport, pcbinfo->localgrphashmask)]; 1301 #ifdef INP_LOCALGROUP_HASHTHR 1302 pkt_hash >>= ncpus2_shift; 1303 #endif 1304 1305 /* 1306 * Order of socket selection: 1307 * 1. non-wild. 1308 * 2. wild. 1309 * 1310 * NOTE: 1311 * - Local group does not contain jailed sockets 1312 * - Local group does not contain IPv4 mapped INET6 wild sockets 1313 */ 1314 LIST_FOREACH(grp, hdr, il_list) { 1315 #ifdef INET6 1316 if (!(grp->il_vflag & INP_IPV4)) 1317 continue; 1318 #endif 1319 if (grp->il_lport == lport) { 1320 int idx; 1321 1322 #ifdef INP_LOCALGROUP_HASHTHR 1323 idx = pkt_hash / grp->il_factor; 1324 KASSERT(idx < grp->il_inpcnt && idx >= 0, 1325 ("invalid hash %04x, cnt %d or fact %d", 1326 pkt_hash, grp->il_inpcnt, grp->il_factor)); 1327 #else 1328 /* 1329 * Modulo-N is used here, which greatly reduces 1330 * completion queue token contention, thus more 1331 * cpu time is saved. 1332 */ 1333 idx = pkt_hash % grp->il_inpcnt; 1334 #endif 1335 1336 if (grp->il_laddr.s_addr == laddr.s_addr) 1337 return grp->il_inp[idx]; 1338 else if (grp->il_laddr.s_addr == INADDR_ANY) 1339 local_wild = grp->il_inp[idx]; 1340 } 1341 } 1342 if (local_wild != NULL) 1343 return local_wild; 1344 return NULL; 1345 } 1346 1347 /* 1348 * Lookup PCB in hash list. 1349 */ 1350 struct inpcb * 1351 in_pcblookup_pkthash(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1352 u_int fport_arg, struct in_addr laddr, u_int lport_arg, 1353 boolean_t wildcard, struct ifnet *ifp, const struct mbuf *m) 1354 { 1355 struct inpcbhead *head; 1356 struct inpcb *inp, *jinp=NULL; 1357 u_short fport = fport_arg, lport = lport_arg; 1358 1359 /* 1360 * First look for an exact match. 1361 */ 1362 head = &pcbinfo->hashbase[INP_PCBCONNHASH(faddr.s_addr, fport, 1363 laddr.s_addr, lport, pcbinfo->hashmask)]; 1364 LIST_FOREACH(inp, head, inp_hash) { 1365 #ifdef INET6 1366 if (!(inp->inp_vflag & INP_IPV4)) 1367 continue; 1368 #endif 1369 if (in_hosteq(inp->inp_faddr, faddr) && 1370 in_hosteq(inp->inp_laddr, laddr) && 1371 inp->inp_fport == fport && inp->inp_lport == lport) { 1372 /* found */ 1373 if (inp->inp_socket == NULL || 1374 inp->inp_socket->so_cred->cr_prison == NULL) { 1375 return (inp); 1376 } else { 1377 if (jinp == NULL) 1378 jinp = inp; 1379 } 1380 } 1381 } 1382 if (jinp != NULL) 1383 return (jinp); 1384 if (wildcard) { 1385 struct inpcb *local_wild = NULL; 1386 struct inpcb *jinp_wild = NULL; 1387 #ifdef INET6 1388 struct inpcb *local_wild_mapped = NULL; 1389 #endif 1390 struct inpcontainer *ic; 1391 struct inpcontainerhead *chead; 1392 struct sockaddr_in jsin; 1393 struct ucred *cred; 1394 1395 /* 1396 * Check local group first 1397 */ 1398 if (pcbinfo->localgrphashbase != NULL && 1399 m != NULL && (m->m_flags & M_HASH) && 1400 !(ifp && ifp->if_type == IFT_FAITH)) { 1401 inp = inp_localgroup_lookup(pcbinfo, 1402 laddr, lport, m->m_pkthdr.hash); 1403 if (inp != NULL) 1404 return inp; 1405 } 1406 1407 /* 1408 * Order of socket selection: 1409 * 1. non-jailed, non-wild. 1410 * 2. non-jailed, wild. 1411 * 3. jailed, non-wild. 1412 * 4. jailed, wild. 1413 */ 1414 jsin.sin_family = AF_INET; 1415 chead = &pcbinfo->wildcardhashbase[ 1416 INP_PCBWILDCARDHASH(lport, pcbinfo->wildcardhashmask)]; 1417 LIST_FOREACH(ic, chead, ic_list) { 1418 inp = ic->ic_inp; 1419 jsin.sin_addr.s_addr = laddr.s_addr; 1420 #ifdef INET6 1421 if (!(inp->inp_vflag & INP_IPV4)) 1422 continue; 1423 #endif 1424 if (inp->inp_socket != NULL) 1425 cred = inp->inp_socket->so_cred; 1426 else 1427 cred = NULL; 1428 if (cred != NULL && jailed(cred)) { 1429 if (jinp != NULL) 1430 continue; 1431 else 1432 if (!jailed_ip(cred->cr_prison, 1433 (struct sockaddr *)&jsin)) 1434 continue; 1435 } 1436 if (inp->inp_lport == lport) { 1437 if (ifp && ifp->if_type == IFT_FAITH && 1438 !(inp->inp_flags & INP_FAITH)) 1439 continue; 1440 if (inp->inp_laddr.s_addr == laddr.s_addr) { 1441 if (cred != NULL && jailed(cred)) 1442 jinp = inp; 1443 else 1444 return (inp); 1445 } 1446 if (inp->inp_laddr.s_addr == INADDR_ANY) { 1447 #ifdef INET6 1448 if (INP_CHECK_SOCKAF(inp->inp_socket, 1449 AF_INET6)) 1450 local_wild_mapped = inp; 1451 else 1452 #endif 1453 if (cred != NULL && 1454 jailed(cred)) 1455 jinp_wild = inp; 1456 else 1457 local_wild = inp; 1458 } 1459 } 1460 } 1461 if (local_wild != NULL) 1462 return (local_wild); 1463 #ifdef INET6 1464 if (local_wild_mapped != NULL) 1465 return (local_wild_mapped); 1466 #endif 1467 if (jinp != NULL) 1468 return (jinp); 1469 return (jinp_wild); 1470 } 1471 1472 /* 1473 * Not found. 1474 */ 1475 return (NULL); 1476 } 1477 1478 struct inpcb * 1479 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1480 u_int fport_arg, struct in_addr laddr, u_int lport_arg, 1481 boolean_t wildcard, struct ifnet *ifp) 1482 { 1483 return in_pcblookup_pkthash(pcbinfo, faddr, fport_arg, 1484 laddr, lport_arg, wildcard, ifp, NULL); 1485 } 1486 1487 /* 1488 * Insert PCB into connection hash table. 1489 */ 1490 void 1491 in_pcbinsconnhash(struct inpcb *inp) 1492 { 1493 struct inpcbinfo *pcbinfo = inp->inp_cpcbinfo; 1494 struct inpcbhead *bucket; 1495 u_int32_t hashkey_faddr, hashkey_laddr; 1496 1497 #ifdef INET6 1498 if (inp->inp_vflag & INP_IPV6) { 1499 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX JH */; 1500 hashkey_laddr = inp->in6p_laddr.s6_addr32[3] /* XXX JH */; 1501 } else { 1502 #endif 1503 hashkey_faddr = inp->inp_faddr.s_addr; 1504 hashkey_laddr = inp->inp_laddr.s_addr; 1505 #ifdef INET6 1506 } 1507 #endif 1508 1509 KASSERT(!(inp->inp_flags & INP_WILDCARD), 1510 ("already on wildcardhash")); 1511 KASSERT(!(inp->inp_flags & INP_CONNECTED), 1512 ("already on connhash")); 1513 inp->inp_flags |= INP_CONNECTED; 1514 1515 /* 1516 * Insert into the connection hash table. 1517 */ 1518 bucket = &pcbinfo->hashbase[INP_PCBCONNHASH(hashkey_faddr, 1519 inp->inp_fport, hashkey_laddr, inp->inp_lport, pcbinfo->hashmask)]; 1520 LIST_INSERT_HEAD(bucket, inp, inp_hash); 1521 } 1522 1523 /* 1524 * Remove PCB from connection hash table. 1525 */ 1526 void 1527 in_pcbremconnhash(struct inpcb *inp) 1528 { 1529 KASSERT(inp->inp_flags & INP_CONNECTED, ("inp not connected")); 1530 LIST_REMOVE(inp, inp_hash); 1531 inp->inp_flags &= ~INP_CONNECTED; 1532 } 1533 1534 /* 1535 * Insert PCB into port hash table. 1536 */ 1537 int 1538 in_pcbinsporthash(struct inpcb *inp) 1539 { 1540 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1541 struct inpcbporthead *pcbporthash; 1542 struct inpcbport *phd; 1543 1544 /* 1545 * If the porthashbase is shared across several cpus we need 1546 * to lock. 1547 */ 1548 if (pcbinfo->porttoken) 1549 lwkt_gettoken(pcbinfo->porttoken); 1550 1551 /* 1552 * Insert into the port hash table. 1553 */ 1554 pcbporthash = &pcbinfo->porthashbase[ 1555 INP_PCBPORTHASH(inp->inp_lport, pcbinfo->porthashmask)]; 1556 1557 /* Go through port list and look for a head for this lport. */ 1558 LIST_FOREACH(phd, pcbporthash, phd_hash) { 1559 if (phd->phd_port == inp->inp_lport) 1560 break; 1561 } 1562 1563 /* If none exists, malloc one and tack it on. */ 1564 if (phd == NULL) { 1565 KKASSERT(pcbinfo->portsave != NULL); 1566 phd = pcbinfo->portsave; 1567 pcbinfo->portsave = NULL; 1568 phd->phd_port = inp->inp_lport; 1569 LIST_INIT(&phd->phd_pcblist); 1570 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1571 } 1572 1573 inp->inp_phd = phd; 1574 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1575 1576 if (pcbinfo->porttoken) 1577 lwkt_reltoken(pcbinfo->porttoken); 1578 if (pcbinfo->portsave == NULL) { 1579 pcbinfo->portsave = kmalloc(sizeof(*pcbinfo->portsave), 1580 M_PCB, M_INTWAIT | M_ZERO); 1581 } 1582 return (0); 1583 } 1584 1585 static struct inp_localgroup * 1586 inp_localgroup_alloc(struct inp_localgrphead *hdr, u_char vflag, 1587 uint16_t port, const union in_dependaddr *addr, int size) 1588 { 1589 struct inp_localgroup *grp; 1590 1591 grp = kmalloc(__offsetof(struct inp_localgroup, il_inp[size]), 1592 M_TEMP, M_INTWAIT | M_ZERO); 1593 grp->il_vflag = vflag; 1594 grp->il_lport = port; 1595 grp->il_dependladdr = *addr; 1596 grp->il_inpsiz = size; 1597 1598 LIST_INSERT_HEAD(hdr, grp, il_list); 1599 1600 return grp; 1601 } 1602 1603 static void 1604 inp_localgroup_free(struct inp_localgroup *grp) 1605 { 1606 LIST_REMOVE(grp, il_list); 1607 kfree(grp, M_TEMP); 1608 } 1609 1610 static struct inp_localgroup * 1611 inp_localgroup_resize(struct inp_localgrphead *hdr, 1612 struct inp_localgroup *old_grp, int size) 1613 { 1614 struct inp_localgroup *grp; 1615 int i; 1616 1617 grp = inp_localgroup_alloc(hdr, old_grp->il_vflag, 1618 old_grp->il_lport, &old_grp->il_dependladdr, size); 1619 1620 KASSERT(old_grp->il_inpcnt < grp->il_inpsiz, 1621 ("invalid new local group size %d and old local group count %d", 1622 grp->il_inpsiz, old_grp->il_inpcnt)); 1623 for (i = 0; i < old_grp->il_inpcnt; ++i) 1624 grp->il_inp[i] = old_grp->il_inp[i]; 1625 grp->il_inpcnt = old_grp->il_inpcnt; 1626 grp->il_factor = old_grp->il_factor; 1627 1628 inp_localgroup_free(old_grp); 1629 1630 return grp; 1631 } 1632 1633 static void 1634 inp_localgroup_factor(struct inp_localgroup *grp) 1635 { 1636 grp->il_factor = 1637 ((uint32_t)(0xffff >> ncpus2_shift) / grp->il_inpcnt) + 1; 1638 KASSERT(grp->il_factor != 0, ("invalid local group factor, " 1639 "ncpus2_shift %d, inpcnt %d", ncpus2_shift, grp->il_inpcnt)); 1640 } 1641 1642 static void 1643 in_pcbinslocalgrphash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1644 { 1645 struct inp_localgrphead *hdr; 1646 struct inp_localgroup *grp; 1647 struct ucred *cred; 1648 1649 if (pcbinfo->localgrphashbase == NULL) 1650 return; 1651 1652 /* 1653 * XXX don't allow jailed socket to join local group 1654 */ 1655 if (inp->inp_socket != NULL) 1656 cred = inp->inp_socket->so_cred; 1657 else 1658 cred = NULL; 1659 if (cred != NULL && jailed(cred)) 1660 return; 1661 1662 #ifdef INET6 1663 /* 1664 * XXX don't allow IPv4 mapped INET6 wild socket 1665 */ 1666 if ((inp->inp_vflag & INP_IPV4) && 1667 inp->inp_laddr.s_addr == INADDR_ANY && 1668 INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6)) 1669 return; 1670 #endif 1671 1672 hdr = &pcbinfo->localgrphashbase[ 1673 INP_PCBLOCALGRPHASH(inp->inp_lport, pcbinfo->localgrphashmask)]; 1674 1675 LIST_FOREACH(grp, hdr, il_list) { 1676 if (grp->il_vflag == inp->inp_vflag && 1677 grp->il_lport == inp->inp_lport && 1678 memcmp(&grp->il_dependladdr, 1679 &inp->inp_inc.inc_ie.ie_dependladdr, 1680 sizeof(grp->il_dependladdr)) == 0) { 1681 break; 1682 } 1683 } 1684 if (grp == NULL) { 1685 /* Create new local group */ 1686 grp = inp_localgroup_alloc(hdr, inp->inp_vflag, 1687 inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr, 1688 INP_LOCALGROUP_SIZMIN); 1689 } else if (grp->il_inpcnt == grp->il_inpsiz) { 1690 if (grp->il_inpsiz >= INP_LOCALGROUP_SIZMAX) { 1691 static int limit_logged = 0; 1692 1693 if (!limit_logged) { 1694 limit_logged = 1; 1695 kprintf("local group port %d, " 1696 "limit reached\n", ntohs(grp->il_lport)); 1697 } 1698 return; 1699 } 1700 1701 /* Expand this local group */ 1702 grp = inp_localgroup_resize(hdr, grp, grp->il_inpsiz * 2); 1703 } 1704 1705 KASSERT(grp->il_inpcnt < grp->il_inpsiz, 1706 ("invalid local group size %d and count %d", 1707 grp->il_inpsiz, grp->il_inpcnt)); 1708 grp->il_inp[grp->il_inpcnt] = inp; 1709 grp->il_inpcnt++; 1710 inp_localgroup_factor(grp); 1711 } 1712 1713 void 1714 in_pcbinswildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1715 { 1716 struct inpcontainer *ic; 1717 struct inpcontainerhead *bucket; 1718 1719 in_pcbinslocalgrphash_oncpu(inp, pcbinfo); 1720 1721 bucket = &pcbinfo->wildcardhashbase[ 1722 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1723 1724 ic = kmalloc(sizeof(struct inpcontainer), M_TEMP, M_INTWAIT); 1725 ic->ic_inp = inp; 1726 LIST_INSERT_HEAD(bucket, ic, ic_list); 1727 } 1728 1729 /* 1730 * Insert PCB into wildcard hash table. 1731 */ 1732 void 1733 in_pcbinswildcardhash(struct inpcb *inp) 1734 { 1735 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1736 1737 KASSERT(!(inp->inp_flags & INP_CONNECTED), 1738 ("already on connhash")); 1739 KASSERT(!(inp->inp_flags & INP_WILDCARD), 1740 ("already on wildcardhash")); 1741 inp->inp_flags |= INP_WILDCARD; 1742 1743 in_pcbinswildcardhash_oncpu(inp, pcbinfo); 1744 } 1745 1746 static void 1747 in_pcbremlocalgrphash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1748 { 1749 struct inp_localgrphead *hdr; 1750 struct inp_localgroup *grp; 1751 1752 if (pcbinfo->localgrphashbase == NULL) 1753 return; 1754 1755 hdr = &pcbinfo->localgrphashbase[ 1756 INP_PCBLOCALGRPHASH(inp->inp_lport, pcbinfo->localgrphashmask)]; 1757 1758 LIST_FOREACH(grp, hdr, il_list) { 1759 int i; 1760 1761 for (i = 0; i < grp->il_inpcnt; ++i) { 1762 if (grp->il_inp[i] != inp) 1763 continue; 1764 1765 if (grp->il_inpcnt == 1) { 1766 /* Free this local group */ 1767 inp_localgroup_free(grp); 1768 } else { 1769 /* Pull up inpcbs */ 1770 for (; i + 1 < grp->il_inpcnt; ++i) 1771 grp->il_inp[i] = grp->il_inp[i + 1]; 1772 grp->il_inpcnt--; 1773 inp_localgroup_factor(grp); 1774 1775 if (grp->il_inpsiz > INP_LOCALGROUP_SIZMIN && 1776 grp->il_inpcnt <= (grp->il_inpsiz / 4)) { 1777 /* Shrink this local group */ 1778 grp = inp_localgroup_resize(hdr, grp, 1779 grp->il_inpsiz / 2); 1780 } 1781 } 1782 return; 1783 } 1784 } 1785 } 1786 1787 void 1788 in_pcbremwildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1789 { 1790 struct inpcontainer *ic; 1791 struct inpcontainerhead *head; 1792 1793 in_pcbremlocalgrphash_oncpu(inp, pcbinfo); 1794 1795 /* find bucket */ 1796 head = &pcbinfo->wildcardhashbase[ 1797 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1798 1799 LIST_FOREACH(ic, head, ic_list) { 1800 if (ic->ic_inp == inp) 1801 goto found; 1802 } 1803 return; /* not found! */ 1804 1805 found: 1806 LIST_REMOVE(ic, ic_list); /* remove container from bucket chain */ 1807 kfree(ic, M_TEMP); /* deallocate container */ 1808 } 1809 1810 /* 1811 * Remove PCB from wildcard hash table. 1812 */ 1813 void 1814 in_pcbremwildcardhash(struct inpcb *inp) 1815 { 1816 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1817 1818 KASSERT(inp->inp_flags & INP_WILDCARD, ("inp not wildcard")); 1819 in_pcbremwildcardhash_oncpu(inp, pcbinfo); 1820 inp->inp_flags &= ~INP_WILDCARD; 1821 } 1822 1823 /* 1824 * Remove PCB from various lists. 1825 */ 1826 void 1827 in_pcbremlists(struct inpcb *inp) 1828 { 1829 struct inpcbinfo *pcbinfo; 1830 1831 if (inp->inp_lport) { 1832 struct inpcbport *phd; 1833 1834 pcbinfo = inp->inp_pcbinfo; 1835 if (pcbinfo->porttoken) 1836 lwkt_gettoken(pcbinfo->porttoken); 1837 1838 phd = inp->inp_phd; 1839 LIST_REMOVE(inp, inp_portlist); 1840 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1841 LIST_REMOVE(phd, phd_hash); 1842 kfree(phd, M_PCB); 1843 } 1844 if (pcbinfo->porttoken) 1845 lwkt_reltoken(pcbinfo->porttoken); 1846 } 1847 if (inp->inp_flags & INP_WILDCARD) { 1848 in_pcbremwildcardhash(inp); 1849 } else if (inp->inp_flags & INP_CONNECTED) { 1850 in_pcbremconnhash(inp); 1851 } 1852 LIST_REMOVE(inp, inp_list); 1853 inp->inp_pcbinfo->ipi_count--; 1854 } 1855 1856 int 1857 prison_xinpcb(struct thread *td, struct inpcb *inp) 1858 { 1859 struct ucred *cr; 1860 1861 if (td->td_proc == NULL) 1862 return (0); 1863 cr = td->td_proc->p_ucred; 1864 if (cr->cr_prison == NULL) 1865 return (0); 1866 if (inp->inp_socket && inp->inp_socket->so_cred && 1867 inp->inp_socket->so_cred->cr_prison && 1868 cr->cr_prison == inp->inp_socket->so_cred->cr_prison) 1869 return (0); 1870 return (1); 1871 } 1872 1873 int 1874 in_pcblist_global(SYSCTL_HANDLER_ARGS) 1875 { 1876 struct inpcbinfo *pcbinfo = arg1; 1877 struct inpcb *inp, *marker; 1878 struct xinpcb xi; 1879 int error, i, n; 1880 1881 /* 1882 * The process of preparing the TCB list is too time-consuming and 1883 * resource-intensive to repeat twice on every request. 1884 */ 1885 if (req->oldptr == NULL) { 1886 n = pcbinfo->ipi_count; 1887 req->oldidx = (n + n/8 + 10) * sizeof(struct xinpcb); 1888 return 0; 1889 } 1890 1891 if (req->newptr != NULL) 1892 return EPERM; 1893 1894 /* 1895 * OK, now we're committed to doing something. Re-fetch ipi_count 1896 * after obtaining the generation count. 1897 */ 1898 n = pcbinfo->ipi_count; 1899 1900 marker = kmalloc(sizeof(struct inpcb), M_TEMP, M_WAITOK|M_ZERO); 1901 marker->inp_flags |= INP_PLACEMARKER; 1902 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list); 1903 1904 i = 0; 1905 error = 0; 1906 1907 while ((inp = LIST_NEXT(marker, inp_list)) != NULL && i < n) { 1908 LIST_REMOVE(marker, inp_list); 1909 LIST_INSERT_AFTER(inp, marker, inp_list); 1910 1911 if (inp->inp_flags & INP_PLACEMARKER) 1912 continue; 1913 if (prison_xinpcb(req->td, inp)) 1914 continue; 1915 bzero(&xi, sizeof xi); 1916 xi.xi_len = sizeof xi; 1917 bcopy(inp, &xi.xi_inp, sizeof *inp); 1918 if (inp->inp_socket) 1919 sotoxsocket(inp->inp_socket, &xi.xi_socket); 1920 if ((error = SYSCTL_OUT(req, &xi, sizeof xi)) != 0) 1921 break; 1922 ++i; 1923 } 1924 LIST_REMOVE(marker, inp_list); 1925 if (error == 0 && i < n) { 1926 bzero(&xi, sizeof xi); 1927 xi.xi_len = sizeof xi; 1928 while (i < n) { 1929 error = SYSCTL_OUT(req, &xi, sizeof xi); 1930 ++i; 1931 } 1932 } 1933 kfree(marker, M_TEMP); 1934 return(error); 1935 } 1936 1937 int 1938 in_pcblist_global_nomarker(SYSCTL_HANDLER_ARGS, struct xinpcb **xi0, int *nxi0) 1939 { 1940 struct inpcbinfo *pcbinfo = arg1; 1941 struct inpcb *inp; 1942 struct xinpcb *xi; 1943 int nxi; 1944 1945 *nxi0 = 0; 1946 *xi0 = NULL; 1947 1948 /* 1949 * The process of preparing the PCB list is too time-consuming and 1950 * resource-intensive to repeat twice on every request. 1951 */ 1952 if (req->oldptr == NULL) { 1953 int n = pcbinfo->ipi_count; 1954 1955 req->oldidx = (n + n/8 + 10) * sizeof(struct xinpcb); 1956 return 0; 1957 } 1958 1959 if (req->newptr != NULL) 1960 return EPERM; 1961 1962 if (pcbinfo->ipi_count == 0) 1963 return 0; 1964 1965 nxi = 0; 1966 xi = kmalloc(pcbinfo->ipi_count * sizeof(*xi), M_TEMP, 1967 M_WAITOK | M_ZERO | M_NULLOK); 1968 if (xi == NULL) 1969 return ENOMEM; 1970 1971 LIST_FOREACH(inp, &pcbinfo->pcblisthead, inp_list) { 1972 struct xinpcb *xi_ptr = &xi[nxi]; 1973 1974 if (prison_xinpcb(req->td, inp)) 1975 continue; 1976 1977 xi_ptr->xi_len = sizeof(*xi_ptr); 1978 bcopy(inp, &xi_ptr->xi_inp, sizeof(*inp)); 1979 if (inp->inp_socket) 1980 sotoxsocket(inp->inp_socket, &xi_ptr->xi_socket); 1981 ++nxi; 1982 } 1983 1984 if (nxi == 0) { 1985 kfree(xi, M_TEMP); 1986 return 0; 1987 } 1988 1989 *nxi0 = nxi; 1990 *xi0 = xi; 1991 1992 return 0; 1993 } 1994 1995 void 1996 in_savefaddr(struct socket *so, const struct sockaddr *faddr) 1997 { 1998 struct sockaddr_in *sin; 1999 2000 KASSERT(faddr->sa_family == AF_INET, 2001 ("not AF_INET faddr %d", faddr->sa_family)); 2002 2003 sin = kmalloc(sizeof(*sin), M_SONAME, M_WAITOK | M_ZERO); 2004 sin->sin_family = AF_INET; 2005 sin->sin_len = sizeof(*sin); 2006 sin->sin_port = ((const struct sockaddr_in *)faddr)->sin_port; 2007 sin->sin_addr = ((const struct sockaddr_in *)faddr)->sin_addr; 2008 2009 so->so_faddr = (struct sockaddr *)sin; 2010 } 2011