1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1991, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 67 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.27 2004/01/02 04:06:42 ambrisko Exp $ 68 * $DragonFly: src/sys/netinet/in_pcb.c,v 1.48 2008/11/08 03:38:23 sephe Exp $ 69 */ 70 71 #include "opt_ipsec.h" 72 #include "opt_inet6.h" 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/malloc.h> 77 #include <sys/mbuf.h> 78 #include <sys/domain.h> 79 #include <sys/protosw.h> 80 #include <sys/socket.h> 81 #include <sys/socketvar.h> 82 #include <sys/proc.h> 83 #include <sys/priv.h> 84 #include <sys/jail.h> 85 #include <sys/kernel.h> 86 #include <sys/sysctl.h> 87 88 #include <sys/thread2.h> 89 #include <sys/socketvar2.h> 90 #include <sys/msgport2.h> 91 92 #include <machine/limits.h> 93 94 #include <net/if.h> 95 #include <net/if_types.h> 96 #include <net/route.h> 97 98 #include <netinet/in.h> 99 #include <netinet/in_pcb.h> 100 #include <netinet/in_var.h> 101 #include <netinet/ip_var.h> 102 #ifdef INET6 103 #include <netinet/ip6.h> 104 #include <netinet6/ip6_var.h> 105 #endif /* INET6 */ 106 107 #ifdef IPSEC 108 #include <netinet6/ipsec.h> 109 #include <netproto/key/key.h> 110 #include <netproto/ipsec/esp_var.h> 111 #endif 112 113 #ifdef FAST_IPSEC 114 #if defined(IPSEC) || defined(IPSEC_ESP) 115 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" 116 #endif 117 118 #include <netproto/ipsec/ipsec.h> 119 #include <netproto/ipsec/key.h> 120 #define IPSEC 121 #endif /* FAST_IPSEC */ 122 123 struct in_addr zeroin_addr; 124 125 /* 126 * These configure the range of local port addresses assigned to 127 * "unspecified" outgoing connections/packets/whatever. 128 */ 129 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 130 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 131 132 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */ 133 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */ 134 135 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 136 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 137 138 #define RANGECHK(var, min, max) \ 139 if ((var) < (min)) { (var) = (min); } \ 140 else if ((var) > (max)) { (var) = (max); } 141 142 int udpencap_enable = 1; /* enabled by default */ 143 int udpencap_port = 4500; /* triggers decapsulation */ 144 145 static int 146 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 147 { 148 int error; 149 150 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 151 if (!error) { 152 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 153 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 154 155 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 156 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 157 158 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 159 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 160 } 161 return (error); 162 } 163 164 #undef RANGECHK 165 166 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 167 168 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 169 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 170 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 171 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 172 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 173 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 174 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 175 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 176 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 177 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 178 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 179 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 180 181 /* 182 * in_pcb.c: manage the Protocol Control Blocks. 183 * 184 * NOTE: It is assumed that most of these functions will be called from 185 * a critical section. XXX - There are, unfortunately, a few exceptions 186 * to this rule that should be fixed. 187 * 188 * NOTE: The caller should initialize the cpu field to the cpu running the 189 * protocol stack associated with this inpcbinfo. 190 */ 191 192 void 193 in_pcbinfo_init(struct inpcbinfo *pcbinfo) 194 { 195 LIST_INIT(&pcbinfo->pcblisthead); 196 pcbinfo->cpu = -1; 197 pcbinfo->portsave = kmalloc(sizeof(*pcbinfo->portsave), M_PCB, 198 M_WAITOK | M_ZERO); 199 } 200 201 struct baddynamicports baddynamicports; 202 203 /* 204 * Check if the specified port is invalid for dynamic allocation. 205 */ 206 int 207 in_baddynamic(u_int16_t port, u_int16_t proto) 208 { 209 switch (proto) { 210 case IPPROTO_TCP: 211 return (DP_ISSET(baddynamicports.tcp, port)); 212 case IPPROTO_UDP: 213 #ifdef IPSEC 214 /* Cannot preset this as it is a sysctl */ 215 if (port == udpencap_port) 216 return (1); 217 #endif 218 return (DP_ISSET(baddynamicports.udp, port)); 219 default: 220 return (0); 221 } 222 } 223 224 225 /* 226 * Allocate a PCB and associate it with the socket. 227 */ 228 int 229 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) 230 { 231 struct inpcb *inp; 232 #ifdef IPSEC 233 int error; 234 #endif 235 236 inp = kmalloc(pcbinfo->ipi_size, M_PCB, M_WAITOK|M_ZERO); 237 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 238 inp->inp_pcbinfo = inp->inp_cpcbinfo = pcbinfo; 239 inp->inp_socket = so; 240 #ifdef IPSEC 241 error = ipsec_init_policy(so, &inp->inp_sp); 242 if (error != 0) { 243 kfree(inp, M_PCB); 244 return (error); 245 } 246 #endif 247 #ifdef INET6 248 if (INP_SOCKAF(so) == AF_INET6 && ip6_v6only) 249 inp->inp_flags |= IN6P_IPV6_V6ONLY; 250 if (ip6_auto_flowlabel) 251 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 252 #endif 253 soreference(so); 254 so->so_pcb = inp; 255 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list); 256 pcbinfo->ipi_count++; 257 return (0); 258 } 259 260 /* 261 * Unlink a pcb with the intention of moving it to another cpu with a 262 * different pcbinfo. While unlinked nothing should attempt to dereference 263 * inp_pcbinfo, NULL it out so we assert if it does. 264 */ 265 void 266 in_pcbunlink(struct inpcb *inp, struct inpcbinfo *pcbinfo) 267 { 268 KKASSERT(inp->inp_pcbinfo == pcbinfo); 269 270 LIST_REMOVE(inp, inp_list); 271 pcbinfo->ipi_count--; 272 inp->inp_pcbinfo = NULL; 273 } 274 275 /* 276 * Relink a pcb into a new pcbinfo. 277 */ 278 void 279 in_pcblink(struct inpcb *inp, struct inpcbinfo *pcbinfo) 280 { 281 KKASSERT(inp->inp_pcbinfo == NULL); 282 inp->inp_pcbinfo = pcbinfo; 283 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list); 284 pcbinfo->ipi_count++; 285 } 286 287 int 288 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 289 { 290 struct socket *so = inp->inp_socket; 291 struct proc *p = td->td_proc; 292 unsigned short *lastport; 293 struct sockaddr_in *sin; 294 struct sockaddr_in jsin; 295 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 296 struct ucred *cred = NULL; 297 u_short lport = 0; 298 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 299 int error; 300 301 KKASSERT(p); 302 303 if (TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) /* XXX broken! */ 304 return (EADDRNOTAVAIL); 305 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 306 return (EINVAL); /* already bound */ 307 308 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) 309 wild = 1; /* neither SO_REUSEADDR nor SO_REUSEPORT is set */ 310 if (p) 311 cred = p->p_ucred; 312 313 /* 314 * This has to be atomic. If the porthash is shared across multiple 315 * protocol threads (aka tcp) then the token will be non-NULL. 316 */ 317 if (pcbinfo->porttoken) 318 lwkt_gettoken(pcbinfo->porttoken); 319 320 if (nam != NULL) { 321 sin = (struct sockaddr_in *)nam; 322 if (nam->sa_len != sizeof *sin) { 323 error = EINVAL; 324 goto done; 325 } 326 #ifdef notdef 327 /* 328 * We should check the family, but old programs 329 * incorrectly fail to initialize it. 330 */ 331 if (sin->sin_family != AF_INET) { 332 error = EAFNOSUPPORT; 333 goto done; 334 } 335 #endif 336 if (!prison_replace_wildcards(td, nam)) { 337 error = EINVAL; 338 goto done; 339 } 340 lport = sin->sin_port; 341 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 342 /* 343 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 344 * allow complete duplication of binding if 345 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 346 * and a multicast address is bound on both 347 * new and duplicated sockets. 348 */ 349 if (so->so_options & SO_REUSEADDR) 350 reuseport = SO_REUSEADDR | SO_REUSEPORT; 351 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 352 sin->sin_port = 0; /* yech... */ 353 bzero(&sin->sin_zero, sizeof sin->sin_zero); 354 if (ifa_ifwithaddr((struct sockaddr *)sin) == NULL) { 355 error = EADDRNOTAVAIL; 356 goto done; 357 } 358 } 359 if (lport != 0) { 360 struct inpcb *t; 361 362 /* GROSS */ 363 if (ntohs(lport) < IPPORT_RESERVED && 364 cred && 365 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0)) { 366 error = EACCES; 367 goto done; 368 } 369 if (so->so_cred->cr_uid != 0 && 370 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 371 t = in_pcblookup_local(pcbinfo, 372 sin->sin_addr, 373 lport, 374 INPLOOKUP_WILDCARD, 375 cred); 376 if (t && 377 (!in_nullhost(sin->sin_addr) || 378 !in_nullhost(t->inp_laddr) || 379 (t->inp_socket->so_options & 380 SO_REUSEPORT) == 0) && 381 (so->so_cred->cr_uid != 382 t->inp_socket->so_cred->cr_uid)) { 383 #ifdef INET6 384 if (!in_nullhost(sin->sin_addr) || 385 !in_nullhost(t->inp_laddr) || 386 INP_SOCKAF(so) == 387 INP_SOCKAF(t->inp_socket)) 388 #endif 389 { 390 error = EADDRINUSE; 391 goto done; 392 } 393 } 394 } 395 if (cred && !prison_replace_wildcards(td, nam)) { 396 error = EADDRNOTAVAIL; 397 goto done; 398 } 399 t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport, 400 wild, cred); 401 if (t && !(reuseport & t->inp_socket->so_options)) { 402 #ifdef INET6 403 if (!in_nullhost(sin->sin_addr) || 404 !in_nullhost(t->inp_laddr) || 405 INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) 406 #endif 407 { 408 error = EADDRINUSE; 409 goto done; 410 } 411 } 412 } 413 inp->inp_laddr = sin->sin_addr; 414 } 415 if (lport == 0) { 416 ushort first, last; 417 int count; 418 419 jsin.sin_family = AF_INET; 420 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 421 if (!prison_replace_wildcards(td, (struct sockaddr *)&jsin)) { 422 inp->inp_laddr.s_addr = INADDR_ANY; 423 error = EINVAL; 424 goto done; 425 } 426 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 427 428 inp->inp_flags |= INP_ANONPORT; 429 430 if (inp->inp_flags & INP_HIGHPORT) { 431 first = ipport_hifirstauto; /* sysctl */ 432 last = ipport_hilastauto; 433 lastport = &pcbinfo->lasthi; 434 } else if (inp->inp_flags & INP_LOWPORT) { 435 if (cred && 436 (error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0))) { 437 inp->inp_laddr.s_addr = INADDR_ANY; 438 goto done; 439 } 440 first = ipport_lowfirstauto; /* 1023 */ 441 last = ipport_lowlastauto; /* 600 */ 442 lastport = &pcbinfo->lastlow; 443 } else { 444 first = ipport_firstauto; /* sysctl */ 445 last = ipport_lastauto; 446 lastport = &pcbinfo->lastport; 447 } 448 /* 449 * Simple check to ensure all ports are not used up causing 450 * a deadlock here. 451 * 452 * We split the two cases (up and down) so that the direction 453 * is not being tested on each round of the loop. 454 */ 455 if (first > last) { 456 /* 457 * counting down 458 */ 459 count = first - last; 460 461 do { 462 if (count-- < 0) { /* completely used? */ 463 inp->inp_laddr.s_addr = INADDR_ANY; 464 error = EADDRNOTAVAIL; 465 goto done; 466 } 467 --*lastport; 468 if (*lastport > first || *lastport < last) 469 *lastport = first; 470 lport = htons(*lastport); 471 } while (in_pcblookup_local(pcbinfo, inp->inp_laddr, 472 lport, wild, cred)); 473 } else { 474 /* 475 * counting up 476 */ 477 count = last - first; 478 479 do { 480 if (count-- < 0) { /* completely used? */ 481 inp->inp_laddr.s_addr = INADDR_ANY; 482 error = EADDRNOTAVAIL; 483 goto done; 484 } 485 ++*lastport; 486 if (*lastport < first || *lastport > last) 487 *lastport = first; 488 lport = htons(*lastport); 489 } while (in_pcblookup_local(pcbinfo, inp->inp_laddr, 490 lport, wild, cred)); 491 } 492 } 493 inp->inp_lport = lport; 494 495 jsin.sin_family = AF_INET; 496 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 497 if (!prison_replace_wildcards(td, (struct sockaddr*)&jsin)) { 498 inp->inp_laddr.s_addr = INADDR_ANY; 499 inp->inp_lport = 0; 500 error = EINVAL; 501 goto done; 502 } 503 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 504 505 if (in_pcbinsporthash(inp) != 0) { 506 inp->inp_laddr.s_addr = INADDR_ANY; 507 inp->inp_lport = 0; 508 error = EAGAIN; 509 goto done; 510 } 511 error = 0; 512 done: 513 if (pcbinfo->porttoken) 514 lwkt_reltoken(pcbinfo->porttoken); 515 return error; 516 } 517 518 /* 519 * Transform old in_pcbconnect() into an inner subroutine for new 520 * in_pcbconnect(): Do some validity-checking on the remote 521 * address (in mbuf 'nam') and then determine local host address 522 * (i.e., which interface) to use to access that remote host. 523 * 524 * This preserves definition of in_pcbconnect(), while supporting a 525 * slightly different version for T/TCP. (This is more than 526 * a bit of a kludge, but cleaning up the internal interfaces would 527 * have forced minor changes in every protocol). 528 */ 529 int 530 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, 531 struct sockaddr_in **plocal_sin, struct thread *td) 532 { 533 struct in_ifaddr *ia; 534 struct ucred *cred = NULL; 535 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 536 struct sockaddr *jsin; 537 int jailed = 0, alloc_route = 0; 538 539 if (nam->sa_len != sizeof *sin) 540 return (EINVAL); 541 if (sin->sin_family != AF_INET) 542 return (EAFNOSUPPORT); 543 if (sin->sin_port == 0) 544 return (EADDRNOTAVAIL); 545 if (td && td->td_proc && td->td_proc->p_ucred) 546 cred = td->td_proc->p_ucred; 547 if (cred && cred->cr_prison) 548 jailed = 1; 549 if (!TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) { 550 ia = TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia; 551 /* 552 * If the destination address is INADDR_ANY, 553 * use the primary local address. 554 * If the supplied address is INADDR_BROADCAST, 555 * and the primary interface supports broadcast, 556 * choose the broadcast address for that interface. 557 */ 558 if (sin->sin_addr.s_addr == INADDR_ANY) 559 sin->sin_addr = IA_SIN(ia)->sin_addr; 560 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST && 561 (ia->ia_ifp->if_flags & IFF_BROADCAST)) 562 sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr; 563 } 564 if (inp->inp_laddr.s_addr == INADDR_ANY) { 565 struct route *ro; 566 567 ia = NULL; 568 /* 569 * If route is known or can be allocated now, 570 * our src addr is taken from the i/f, else punt. 571 * Note that we should check the address family of the cached 572 * destination, in case of sharing the cache with IPv6. 573 */ 574 ro = &inp->inp_route; 575 if (ro->ro_rt && 576 (!(ro->ro_rt->rt_flags & RTF_UP) || 577 ro->ro_dst.sa_family != AF_INET || 578 satosin(&ro->ro_dst)->sin_addr.s_addr != 579 sin->sin_addr.s_addr || 580 inp->inp_socket->so_options & SO_DONTROUTE)) { 581 RTFREE(ro->ro_rt); 582 ro->ro_rt = NULL; 583 } 584 if (!(inp->inp_socket->so_options & SO_DONTROUTE) && /*XXX*/ 585 (ro->ro_rt == NULL || 586 ro->ro_rt->rt_ifp == NULL)) { 587 /* No route yet, so try to acquire one */ 588 bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); 589 ro->ro_dst.sa_family = AF_INET; 590 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 591 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = 592 sin->sin_addr; 593 rtalloc(ro); 594 alloc_route = 1; 595 } 596 /* 597 * If we found a route, use the address 598 * corresponding to the outgoing interface 599 * unless it is the loopback (in case a route 600 * to our address on another net goes to loopback). 601 */ 602 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { 603 if (jailed) { 604 if (jailed_ip(cred->cr_prison, 605 ro->ro_rt->rt_ifa->ifa_addr)) { 606 ia = ifatoia(ro->ro_rt->rt_ifa); 607 } 608 } else { 609 ia = ifatoia(ro->ro_rt->rt_ifa); 610 } 611 } 612 if (ia == NULL) { 613 u_short fport = sin->sin_port; 614 615 sin->sin_port = 0; 616 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); 617 if (ia && jailed && !jailed_ip(cred->cr_prison, 618 sintosa(&ia->ia_addr))) 619 ia = NULL; 620 if (ia == NULL) 621 ia = ifatoia(ifa_ifwithnet(sintosa(sin))); 622 if (ia && jailed && !jailed_ip(cred->cr_prison, 623 sintosa(&ia->ia_addr))) 624 ia = NULL; 625 sin->sin_port = fport; 626 if (ia == NULL && 627 !TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) 628 ia = TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia; 629 if (ia && jailed && !jailed_ip(cred->cr_prison, 630 sintosa(&ia->ia_addr))) 631 ia = NULL; 632 633 if (!jailed && ia == NULL) 634 goto fail; 635 } 636 /* 637 * If the destination address is multicast and an outgoing 638 * interface has been set as a multicast option, use the 639 * address of that interface as our source address. 640 */ 641 if (!jailed && IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && 642 inp->inp_moptions != NULL) { 643 struct ip_moptions *imo; 644 struct ifnet *ifp; 645 646 imo = inp->inp_moptions; 647 if (imo->imo_multicast_ifp != NULL) { 648 struct in_ifaddr_container *iac; 649 650 ifp = imo->imo_multicast_ifp; 651 ia = NULL; 652 TAILQ_FOREACH(iac, 653 &in_ifaddrheads[mycpuid], ia_link) { 654 if (iac->ia->ia_ifp == ifp) { 655 ia = iac->ia; 656 break; 657 } 658 } 659 if (ia == NULL) 660 goto fail; 661 } 662 } 663 /* 664 * Don't do pcblookup call here; return interface in plocal_sin 665 * and exit to caller, that will do the lookup. 666 */ 667 if (ia == NULL && jailed) { 668 if ((jsin = prison_get_nonlocal(cred->cr_prison, AF_INET, NULL)) != NULL || 669 (jsin = prison_get_local(cred->cr_prison, AF_INET, NULL)) != NULL) { 670 *plocal_sin = satosin(jsin); 671 } else { 672 /* IPv6 only Jail */ 673 goto fail; 674 } 675 } else { 676 *plocal_sin = &ia->ia_addr; 677 } 678 } 679 return (0); 680 fail: 681 if (alloc_route) { 682 struct route *ro = &inp->inp_route; 683 684 if (ro->ro_rt != NULL) 685 RTFREE(ro->ro_rt); 686 bzero(ro, sizeof(*ro)); 687 } 688 return (EADDRNOTAVAIL); 689 } 690 691 /* 692 * Outer subroutine: 693 * Connect from a socket to a specified address. 694 * Both address and port must be specified in argument sin. 695 * If don't have a local address for this socket yet, 696 * then pick one. 697 */ 698 int 699 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 700 { 701 struct sockaddr_in *if_sin; 702 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 703 int error; 704 705 /* Call inner routine to assign local interface address. */ 706 if ((error = in_pcbladdr(inp, nam, &if_sin, td)) != 0) 707 return (error); 708 709 if (in_pcblookup_hash(inp->inp_cpcbinfo, sin->sin_addr, sin->sin_port, 710 inp->inp_laddr.s_addr ? 711 inp->inp_laddr : if_sin->sin_addr, 712 inp->inp_lport, FALSE, NULL) != NULL) { 713 return (EADDRINUSE); 714 } 715 if (inp->inp_laddr.s_addr == INADDR_ANY) { 716 if (inp->inp_lport == 0) { 717 error = in_pcbbind(inp, NULL, td); 718 if (error) 719 return (error); 720 } 721 inp->inp_laddr = if_sin->sin_addr; 722 } 723 inp->inp_faddr = sin->sin_addr; 724 inp->inp_fport = sin->sin_port; 725 in_pcbinsconnhash(inp); 726 return (0); 727 } 728 729 void 730 in_pcbdisconnect(struct inpcb *inp) 731 { 732 733 inp->inp_faddr.s_addr = INADDR_ANY; 734 inp->inp_fport = 0; 735 in_pcbremconnhash(inp); 736 if (inp->inp_socket->so_state & SS_NOFDREF) 737 in_pcbdetach(inp); 738 } 739 740 void 741 in_pcbdetach(struct inpcb *inp) 742 { 743 struct socket *so = inp->inp_socket; 744 struct inpcbinfo *ipi = inp->inp_pcbinfo; 745 746 #ifdef IPSEC 747 ipsec4_delete_pcbpolicy(inp); 748 #endif /*IPSEC*/ 749 inp->inp_gencnt = ++ipi->ipi_gencnt; 750 KKASSERT((so->so_state & SS_ASSERTINPROG) == 0); 751 in_pcbremlists(inp); 752 so->so_pcb = NULL; 753 sofree(so); /* remove pcb ref */ 754 if (inp->inp_options) 755 m_free(inp->inp_options); 756 if (inp->inp_route.ro_rt) 757 rtfree(inp->inp_route.ro_rt); 758 ip_freemoptions(inp->inp_moptions); 759 inp->inp_vflag = 0; 760 kfree(inp, M_PCB); 761 } 762 763 /* 764 * The calling convention of in_setsockaddr() and in_setpeeraddr() was 765 * modified to match the pru_sockaddr() and pru_peeraddr() entry points 766 * in struct pr_usrreqs, so that protocols can just reference then directly 767 * without the need for a wrapper function. The socket must have a valid 768 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 769 * except through a kernel programming error, so it is acceptable to panic 770 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 771 * because there actually /is/ a programming error somewhere... XXX) 772 */ 773 int 774 in_setsockaddr(struct socket *so, struct sockaddr **nam) 775 { 776 struct inpcb *inp; 777 struct sockaddr_in *sin; 778 779 /* 780 * Do the malloc first in case it blocks. 781 */ 782 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 783 M_WAITOK | M_ZERO); 784 sin->sin_family = AF_INET; 785 sin->sin_len = sizeof *sin; 786 787 crit_enter(); 788 inp = so->so_pcb; 789 if (!inp) { 790 crit_exit(); 791 kfree(sin, M_SONAME); 792 return (ECONNRESET); 793 } 794 sin->sin_port = inp->inp_lport; 795 sin->sin_addr = inp->inp_laddr; 796 crit_exit(); 797 798 *nam = (struct sockaddr *)sin; 799 return (0); 800 } 801 802 void 803 in_setsockaddr_dispatch(netmsg_t msg) 804 { 805 int error; 806 807 error = in_setsockaddr(msg->base.nm_so, msg->peeraddr.nm_nam); 808 lwkt_replymsg(&msg->lmsg, error); 809 } 810 811 int 812 in_setpeeraddr(struct socket *so, struct sockaddr **nam) 813 { 814 struct inpcb *inp; 815 struct sockaddr_in *sin; 816 817 /* 818 * Do the malloc first in case it blocks. 819 */ 820 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 821 M_WAITOK | M_ZERO); 822 sin->sin_family = AF_INET; 823 sin->sin_len = sizeof *sin; 824 825 crit_enter(); 826 inp = so->so_pcb; 827 if (!inp) { 828 crit_exit(); 829 kfree(sin, M_SONAME); 830 return (ECONNRESET); 831 } 832 sin->sin_port = inp->inp_fport; 833 sin->sin_addr = inp->inp_faddr; 834 crit_exit(); 835 836 *nam = (struct sockaddr *)sin; 837 return (0); 838 } 839 840 void 841 in_setpeeraddr_dispatch(netmsg_t msg) 842 { 843 int error; 844 845 error = in_setpeeraddr(msg->base.nm_so, msg->peeraddr.nm_nam); 846 lwkt_replymsg(&msg->lmsg, error); 847 } 848 849 void 850 in_pcbnotifyall(struct inpcbhead *head, struct in_addr faddr, int err, 851 void (*notify)(struct inpcb *, int)) 852 { 853 struct inpcb *inp, *ninp; 854 855 /* 856 * note: if INP_PLACEMARKER is set we must ignore the rest of 857 * the structure and skip it. 858 */ 859 crit_enter(); 860 LIST_FOREACH_MUTABLE(inp, head, inp_list, ninp) { 861 if (inp->inp_flags & INP_PLACEMARKER) 862 continue; 863 #ifdef INET6 864 if (!(inp->inp_vflag & INP_IPV4)) 865 continue; 866 #endif 867 if (inp->inp_faddr.s_addr != faddr.s_addr || 868 inp->inp_socket == NULL) 869 continue; 870 (*notify)(inp, err); /* can remove inp from list! */ 871 } 872 crit_exit(); 873 } 874 875 void 876 in_pcbpurgeif0(struct inpcb *head, struct ifnet *ifp) 877 { 878 struct inpcb *inp; 879 struct ip_moptions *imo; 880 int i, gap; 881 882 for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) { 883 if (inp->inp_flags & INP_PLACEMARKER) 884 continue; 885 imo = inp->inp_moptions; 886 if ((inp->inp_vflag & INP_IPV4) && imo != NULL) { 887 /* 888 * Unselect the outgoing interface if it is being 889 * detached. 890 */ 891 if (imo->imo_multicast_ifp == ifp) 892 imo->imo_multicast_ifp = NULL; 893 894 /* 895 * Drop multicast group membership if we joined 896 * through the interface being detached. 897 */ 898 for (i = 0, gap = 0; i < imo->imo_num_memberships; 899 i++) { 900 if (imo->imo_membership[i]->inm_ifp == ifp) { 901 in_delmulti(imo->imo_membership[i]); 902 gap++; 903 } else if (gap != 0) 904 imo->imo_membership[i - gap] = 905 imo->imo_membership[i]; 906 } 907 imo->imo_num_memberships -= gap; 908 } 909 } 910 } 911 912 /* 913 * Check for alternatives when higher level complains 914 * about service problems. For now, invalidate cached 915 * routing information. If the route was created dynamically 916 * (by a redirect), time to try a default gateway again. 917 */ 918 void 919 in_losing(struct inpcb *inp) 920 { 921 struct rtentry *rt; 922 struct rt_addrinfo rtinfo; 923 924 if ((rt = inp->inp_route.ro_rt)) { 925 bzero(&rtinfo, sizeof(struct rt_addrinfo)); 926 rtinfo.rti_info[RTAX_DST] = rt_key(rt); 927 rtinfo.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 928 rtinfo.rti_info[RTAX_NETMASK] = rt_mask(rt); 929 rtinfo.rti_flags = rt->rt_flags; 930 rt_missmsg(RTM_LOSING, &rtinfo, rt->rt_flags, 0); 931 if (rt->rt_flags & RTF_DYNAMIC) 932 rtrequest1_global(RTM_DELETE, &rtinfo, NULL, NULL); 933 inp->inp_route.ro_rt = NULL; 934 rtfree(rt); 935 /* 936 * A new route can be allocated 937 * the next time output is attempted. 938 */ 939 } 940 } 941 942 /* 943 * After a routing change, flush old routing 944 * and allocate a (hopefully) better one. 945 */ 946 void 947 in_rtchange(struct inpcb *inp, int err) 948 { 949 if (inp->inp_route.ro_rt) { 950 rtfree(inp->inp_route.ro_rt); 951 inp->inp_route.ro_rt = NULL; 952 /* 953 * A new route can be allocated the next time 954 * output is attempted. 955 */ 956 } 957 } 958 959 /* 960 * Lookup a PCB based on the local address and port. 961 */ 962 struct inpcb * 963 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, 964 u_int lport_arg, int wild_okay, struct ucred *cred) 965 { 966 struct inpcb *inp; 967 int matchwild = 3, wildcard; 968 u_short lport = lport_arg; 969 struct inpcbporthead *porthash; 970 struct inpcbport *phd; 971 struct inpcb *match = NULL; 972 973 /* 974 * If the porthashbase is shared across several cpus we need 975 * to lock. 976 */ 977 if (pcbinfo->porttoken) 978 lwkt_gettoken(pcbinfo->porttoken); 979 980 /* 981 * Best fit PCB lookup. 982 * 983 * First see if this local port is in use by looking on the 984 * port hash list. 985 */ 986 porthash = &pcbinfo->porthashbase[ 987 INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; 988 LIST_FOREACH(phd, porthash, phd_hash) { 989 if (phd->phd_port == lport) 990 break; 991 } 992 if (phd != NULL) { 993 /* 994 * Port is in use by one or more PCBs. Look for best 995 * fit. 996 */ 997 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 998 wildcard = 0; 999 #ifdef INET6 1000 if ((inp->inp_vflag & INP_IPV4) == 0) 1001 continue; 1002 #endif 1003 if (inp->inp_faddr.s_addr != INADDR_ANY) 1004 wildcard++; 1005 if (inp->inp_laddr.s_addr != INADDR_ANY) { 1006 if (laddr.s_addr == INADDR_ANY) 1007 wildcard++; 1008 else if (inp->inp_laddr.s_addr != laddr.s_addr) 1009 continue; 1010 } else { 1011 if (laddr.s_addr != INADDR_ANY) 1012 wildcard++; 1013 } 1014 if (wildcard && !wild_okay) 1015 continue; 1016 if (wildcard < matchwild && 1017 (cred == NULL || 1018 cred->cr_prison == 1019 inp->inp_socket->so_cred->cr_prison)) { 1020 match = inp; 1021 matchwild = wildcard; 1022 if (matchwild == 0) { 1023 break; 1024 } 1025 } 1026 } 1027 } 1028 if (pcbinfo->porttoken) 1029 lwkt_reltoken(pcbinfo->porttoken); 1030 return (match); 1031 } 1032 1033 /* 1034 * Lookup PCB in hash list. 1035 */ 1036 struct inpcb * 1037 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1038 u_int fport_arg, struct in_addr laddr, u_int lport_arg, 1039 boolean_t wildcard, struct ifnet *ifp) 1040 { 1041 struct inpcbhead *head; 1042 struct inpcb *inp, *jinp=NULL; 1043 u_short fport = fport_arg, lport = lport_arg; 1044 1045 /* 1046 * First look for an exact match. 1047 */ 1048 head = &pcbinfo->hashbase[INP_PCBCONNHASH(faddr.s_addr, fport, 1049 laddr.s_addr, lport, pcbinfo->hashmask)]; 1050 LIST_FOREACH(inp, head, inp_hash) { 1051 #ifdef INET6 1052 if (!(inp->inp_vflag & INP_IPV4)) 1053 continue; 1054 #endif 1055 if (in_hosteq(inp->inp_faddr, faddr) && 1056 in_hosteq(inp->inp_laddr, laddr) && 1057 inp->inp_fport == fport && inp->inp_lport == lport) { 1058 /* found */ 1059 if (inp->inp_socket == NULL || 1060 inp->inp_socket->so_cred->cr_prison == NULL) { 1061 return (inp); 1062 } else { 1063 if (jinp == NULL) 1064 jinp = inp; 1065 } 1066 } 1067 } 1068 if (jinp != NULL) 1069 return (jinp); 1070 if (wildcard) { 1071 struct inpcb *local_wild = NULL; 1072 struct inpcb *jinp_wild = NULL; 1073 #ifdef INET6 1074 struct inpcb *local_wild_mapped = NULL; 1075 #endif 1076 struct inpcontainer *ic; 1077 struct inpcontainerhead *chead; 1078 struct sockaddr_in jsin; 1079 struct ucred *cred; 1080 1081 /* 1082 * Order of socket selection: 1083 * 1. non-jailed, non-wild. 1084 * 2. non-jailed, wild. 1085 * 3. jailed, non-wild. 1086 * 4. jailed, wild. 1087 */ 1088 jsin.sin_family = AF_INET; 1089 chead = &pcbinfo->wildcardhashbase[ 1090 INP_PCBWILDCARDHASH(lport, pcbinfo->wildcardhashmask)]; 1091 LIST_FOREACH(ic, chead, ic_list) { 1092 inp = ic->ic_inp; 1093 jsin.sin_addr.s_addr = laddr.s_addr; 1094 #ifdef INET6 1095 if (!(inp->inp_vflag & INP_IPV4)) 1096 continue; 1097 #endif 1098 if (inp->inp_socket != NULL) 1099 cred = inp->inp_socket->so_cred; 1100 else 1101 cred = NULL; 1102 if (cred != NULL && jailed(cred)) { 1103 if (jinp != NULL) 1104 continue; 1105 else 1106 if (!jailed_ip(cred->cr_prison, 1107 (struct sockaddr *)&jsin)) 1108 continue; 1109 } 1110 if (inp->inp_lport == lport) { 1111 if (ifp && ifp->if_type == IFT_FAITH && 1112 !(inp->inp_flags & INP_FAITH)) 1113 continue; 1114 if (inp->inp_laddr.s_addr == laddr.s_addr) { 1115 if (cred != NULL && jailed(cred)) 1116 jinp = inp; 1117 else 1118 return (inp); 1119 } 1120 if (inp->inp_laddr.s_addr == INADDR_ANY) { 1121 #ifdef INET6 1122 if (INP_CHECK_SOCKAF(inp->inp_socket, 1123 AF_INET6)) 1124 local_wild_mapped = inp; 1125 else 1126 #endif 1127 if (cred != NULL && 1128 jailed(cred)) 1129 jinp_wild = inp; 1130 else 1131 local_wild = inp; 1132 } 1133 } 1134 } 1135 if (local_wild != NULL) 1136 return (local_wild); 1137 #ifdef INET6 1138 if (local_wild_mapped != NULL) 1139 return (local_wild_mapped); 1140 #endif 1141 if (jinp != NULL) 1142 return (jinp); 1143 return (jinp_wild); 1144 } 1145 1146 /* 1147 * Not found. 1148 */ 1149 return (NULL); 1150 } 1151 1152 /* 1153 * Insert PCB into connection hash table. 1154 */ 1155 void 1156 in_pcbinsconnhash(struct inpcb *inp) 1157 { 1158 struct inpcbinfo *pcbinfo = inp->inp_cpcbinfo; 1159 struct inpcbhead *bucket; 1160 u_int32_t hashkey_faddr, hashkey_laddr; 1161 1162 #ifdef INET6 1163 if (inp->inp_vflag & INP_IPV6) { 1164 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX JH */; 1165 hashkey_laddr = inp->in6p_laddr.s6_addr32[3] /* XXX JH */; 1166 } else { 1167 #endif 1168 hashkey_faddr = inp->inp_faddr.s_addr; 1169 hashkey_laddr = inp->inp_laddr.s_addr; 1170 #ifdef INET6 1171 } 1172 #endif 1173 1174 KASSERT(!(inp->inp_flags & INP_CONNECTED), ("already on hash list")); 1175 inp->inp_flags |= INP_CONNECTED; 1176 1177 /* 1178 * Insert into the connection hash table. 1179 */ 1180 bucket = &pcbinfo->hashbase[INP_PCBCONNHASH(hashkey_faddr, 1181 inp->inp_fport, hashkey_laddr, inp->inp_lport, pcbinfo->hashmask)]; 1182 LIST_INSERT_HEAD(bucket, inp, inp_hash); 1183 } 1184 1185 /* 1186 * Remove PCB from connection hash table. 1187 */ 1188 void 1189 in_pcbremconnhash(struct inpcb *inp) 1190 { 1191 KASSERT(inp->inp_flags & INP_CONNECTED, ("inp not connected")); 1192 LIST_REMOVE(inp, inp_hash); 1193 inp->inp_flags &= ~INP_CONNECTED; 1194 } 1195 1196 /* 1197 * Insert PCB into port hash table. 1198 */ 1199 int 1200 in_pcbinsporthash(struct inpcb *inp) 1201 { 1202 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1203 struct inpcbporthead *pcbporthash; 1204 struct inpcbport *phd; 1205 1206 /* 1207 * If the porthashbase is shared across several cpus we need 1208 * to lock. 1209 */ 1210 if (pcbinfo->porttoken) 1211 lwkt_gettoken(pcbinfo->porttoken); 1212 1213 /* 1214 * Insert into the port hash table. 1215 */ 1216 pcbporthash = &pcbinfo->porthashbase[ 1217 INP_PCBPORTHASH(inp->inp_lport, pcbinfo->porthashmask)]; 1218 1219 /* Go through port list and look for a head for this lport. */ 1220 LIST_FOREACH(phd, pcbporthash, phd_hash) { 1221 if (phd->phd_port == inp->inp_lport) 1222 break; 1223 } 1224 1225 /* If none exists, malloc one and tack it on. */ 1226 if (phd == NULL) { 1227 KKASSERT(pcbinfo->portsave != NULL); 1228 phd = pcbinfo->portsave; 1229 pcbinfo->portsave = NULL; 1230 phd->phd_port = inp->inp_lport; 1231 LIST_INIT(&phd->phd_pcblist); 1232 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1233 } 1234 1235 inp->inp_phd = phd; 1236 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1237 1238 if (pcbinfo->porttoken) 1239 lwkt_reltoken(pcbinfo->porttoken); 1240 if (pcbinfo->portsave == NULL) { 1241 pcbinfo->portsave = kmalloc(sizeof(*pcbinfo->portsave), 1242 M_PCB, M_INTWAIT | M_ZERO); 1243 } 1244 return (0); 1245 } 1246 1247 void 1248 in_pcbinswildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1249 { 1250 struct inpcontainer *ic; 1251 struct inpcontainerhead *bucket; 1252 1253 bucket = &pcbinfo->wildcardhashbase[ 1254 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1255 1256 ic = kmalloc(sizeof(struct inpcontainer), M_TEMP, M_INTWAIT); 1257 ic->ic_inp = inp; 1258 LIST_INSERT_HEAD(bucket, ic, ic_list); 1259 } 1260 1261 /* 1262 * Insert PCB into wildcard hash table. 1263 */ 1264 void 1265 in_pcbinswildcardhash(struct inpcb *inp) 1266 { 1267 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1268 1269 KKASSERT(pcbinfo != NULL); 1270 1271 in_pcbinswildcardhash_oncpu(inp, pcbinfo); 1272 inp->inp_flags |= INP_WILDCARD; 1273 } 1274 1275 void 1276 in_pcbremwildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1277 { 1278 struct inpcontainer *ic; 1279 struct inpcontainerhead *head; 1280 1281 /* find bucket */ 1282 head = &pcbinfo->wildcardhashbase[ 1283 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1284 1285 LIST_FOREACH(ic, head, ic_list) { 1286 if (ic->ic_inp == inp) 1287 goto found; 1288 } 1289 return; /* not found! */ 1290 1291 found: 1292 LIST_REMOVE(ic, ic_list); /* remove container from bucket chain */ 1293 kfree(ic, M_TEMP); /* deallocate container */ 1294 } 1295 1296 /* 1297 * Remove PCB from wildcard hash table. 1298 */ 1299 void 1300 in_pcbremwildcardhash(struct inpcb *inp) 1301 { 1302 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1303 1304 KASSERT(inp->inp_flags & INP_WILDCARD, ("inp not wildcard")); 1305 in_pcbremwildcardhash_oncpu(inp, pcbinfo); 1306 inp->inp_flags &= ~INP_WILDCARD; 1307 } 1308 1309 /* 1310 * Remove PCB from various lists. 1311 */ 1312 void 1313 in_pcbremlists(struct inpcb *inp) 1314 { 1315 struct inpcbinfo *pcbinfo; 1316 1317 if (inp->inp_lport) { 1318 struct inpcbport *phd; 1319 1320 pcbinfo = inp->inp_pcbinfo; 1321 if (pcbinfo->porttoken) 1322 lwkt_gettoken(pcbinfo->porttoken); 1323 1324 phd = inp->inp_phd; 1325 LIST_REMOVE(inp, inp_portlist); 1326 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1327 LIST_REMOVE(phd, phd_hash); 1328 kfree(phd, M_PCB); 1329 } 1330 if (pcbinfo->porttoken) 1331 lwkt_reltoken(pcbinfo->porttoken); 1332 } 1333 if (inp->inp_flags & INP_WILDCARD) { 1334 in_pcbremwildcardhash(inp); 1335 } else if (inp->inp_flags & INP_CONNECTED) { 1336 in_pcbremconnhash(inp); 1337 } 1338 LIST_REMOVE(inp, inp_list); 1339 inp->inp_pcbinfo->ipi_count--; 1340 } 1341 1342 int 1343 prison_xinpcb(struct thread *td, struct inpcb *inp) 1344 { 1345 struct ucred *cr; 1346 1347 if (td->td_proc == NULL) 1348 return (0); 1349 cr = td->td_proc->p_ucred; 1350 if (cr->cr_prison == NULL) 1351 return (0); 1352 if (inp->inp_socket && inp->inp_socket->so_cred && 1353 inp->inp_socket->so_cred->cr_prison && 1354 cr->cr_prison == inp->inp_socket->so_cred->cr_prison) 1355 return (0); 1356 return (1); 1357 } 1358 1359 int 1360 in_pcblist_global(SYSCTL_HANDLER_ARGS) 1361 { 1362 struct inpcbinfo *pcbinfo = arg1; 1363 struct inpcb *inp, *marker; 1364 struct xinpcb xi; 1365 int error, i, n; 1366 1367 /* 1368 * The process of preparing the TCB list is too time-consuming and 1369 * resource-intensive to repeat twice on every request. 1370 */ 1371 if (req->oldptr == NULL) { 1372 n = pcbinfo->ipi_count; 1373 req->oldidx = (n + n/8 + 10) * sizeof(struct xinpcb); 1374 return 0; 1375 } 1376 1377 if (req->newptr != NULL) 1378 return EPERM; 1379 1380 /* 1381 * OK, now we're committed to doing something. Re-fetch ipi_count 1382 * after obtaining the generation count. 1383 */ 1384 n = pcbinfo->ipi_count; 1385 1386 marker = kmalloc(sizeof(struct inpcb), M_TEMP, M_WAITOK|M_ZERO); 1387 marker->inp_flags |= INP_PLACEMARKER; 1388 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list); 1389 1390 i = 0; 1391 error = 0; 1392 1393 while ((inp = LIST_NEXT(marker, inp_list)) != NULL && i < n) { 1394 LIST_REMOVE(marker, inp_list); 1395 LIST_INSERT_AFTER(inp, marker, inp_list); 1396 1397 if (inp->inp_flags & INP_PLACEMARKER) 1398 continue; 1399 if (prison_xinpcb(req->td, inp)) 1400 continue; 1401 bzero(&xi, sizeof xi); 1402 xi.xi_len = sizeof xi; 1403 bcopy(inp, &xi.xi_inp, sizeof *inp); 1404 if (inp->inp_socket) 1405 sotoxsocket(inp->inp_socket, &xi.xi_socket); 1406 if ((error = SYSCTL_OUT(req, &xi, sizeof xi)) != 0) 1407 break; 1408 ++i; 1409 } 1410 LIST_REMOVE(marker, inp_list); 1411 if (error == 0 && i < n) { 1412 bzero(&xi, sizeof xi); 1413 xi.xi_len = sizeof xi; 1414 while (i < n) { 1415 error = SYSCTL_OUT(req, &xi, sizeof xi); 1416 ++i; 1417 } 1418 } 1419 kfree(marker, M_TEMP); 1420 return(error); 1421 } 1422