1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1991, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 67 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.27 2004/01/02 04:06:42 ambrisko Exp $ 68 */ 69 70 #include "opt_ipsec.h" 71 #include "opt_inet6.h" 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/malloc.h> 76 #include <sys/mbuf.h> 77 #include <sys/domain.h> 78 #include <sys/protosw.h> 79 #include <sys/socket.h> 80 #include <sys/socketvar.h> 81 #include <sys/proc.h> 82 #include <sys/priv.h> 83 #include <sys/jail.h> 84 #include <sys/kernel.h> 85 #include <sys/sysctl.h> 86 87 #include <sys/thread2.h> 88 #include <sys/socketvar2.h> 89 #include <sys/msgport2.h> 90 91 #include <machine/limits.h> 92 93 #include <net/if.h> 94 #include <net/if_types.h> 95 #include <net/route.h> 96 97 #include <netinet/in.h> 98 #include <netinet/in_pcb.h> 99 #include <netinet/in_var.h> 100 #include <netinet/ip_var.h> 101 #ifdef INET6 102 #include <netinet/ip6.h> 103 #include <netinet6/ip6_var.h> 104 #endif /* INET6 */ 105 106 #ifdef IPSEC 107 #include <netinet6/ipsec.h> 108 #include <netproto/key/key.h> 109 #include <netproto/ipsec/esp_var.h> 110 #endif 111 112 #ifdef FAST_IPSEC 113 #if defined(IPSEC) || defined(IPSEC_ESP) 114 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" 115 #endif 116 117 #include <netproto/ipsec/ipsec.h> 118 #include <netproto/ipsec/key.h> 119 #define IPSEC 120 #endif /* FAST_IPSEC */ 121 122 struct in_addr zeroin_addr; 123 124 /* 125 * These configure the range of local port addresses assigned to 126 * "unspecified" outgoing connections/packets/whatever. 127 */ 128 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 129 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 130 131 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */ 132 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */ 133 134 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 135 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 136 137 #define RANGECHK(var, min, max) \ 138 if ((var) < (min)) { (var) = (min); } \ 139 else if ((var) > (max)) { (var) = (max); } 140 141 int udpencap_enable = 1; /* enabled by default */ 142 int udpencap_port = 4500; /* triggers decapsulation */ 143 144 static int 145 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 146 { 147 int error; 148 149 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 150 if (!error) { 151 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 152 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 153 154 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 155 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 156 157 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 158 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 159 } 160 return (error); 161 } 162 163 #undef RANGECHK 164 165 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 166 167 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 168 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 169 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 170 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 171 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 172 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 173 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 174 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 175 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 176 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 177 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 178 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 179 180 /* 181 * in_pcb.c: manage the Protocol Control Blocks. 182 * 183 * NOTE: It is assumed that most of these functions will be called from 184 * a critical section. XXX - There are, unfortunately, a few exceptions 185 * to this rule that should be fixed. 186 * 187 * NOTE: The caller should initialize the cpu field to the cpu running the 188 * protocol stack associated with this inpcbinfo. 189 */ 190 191 void 192 in_pcbinfo_init(struct inpcbinfo *pcbinfo) 193 { 194 LIST_INIT(&pcbinfo->pcblisthead); 195 pcbinfo->cpu = -1; 196 pcbinfo->portsave = kmalloc(sizeof(*pcbinfo->portsave), M_PCB, 197 M_WAITOK | M_ZERO); 198 } 199 200 struct baddynamicports baddynamicports; 201 202 /* 203 * Check if the specified port is invalid for dynamic allocation. 204 */ 205 int 206 in_baddynamic(u_int16_t port, u_int16_t proto) 207 { 208 switch (proto) { 209 case IPPROTO_TCP: 210 return (DP_ISSET(baddynamicports.tcp, port)); 211 case IPPROTO_UDP: 212 #ifdef IPSEC 213 /* Cannot preset this as it is a sysctl */ 214 if (port == udpencap_port) 215 return (1); 216 #endif 217 return (DP_ISSET(baddynamicports.udp, port)); 218 default: 219 return (0); 220 } 221 } 222 223 224 /* 225 * Allocate a PCB and associate it with the socket. 226 */ 227 int 228 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) 229 { 230 struct inpcb *inp; 231 #ifdef IPSEC 232 int error; 233 #endif 234 235 inp = kmalloc(pcbinfo->ipi_size, M_PCB, M_WAITOK|M_ZERO); 236 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 237 inp->inp_pcbinfo = inp->inp_cpcbinfo = pcbinfo; 238 inp->inp_socket = so; 239 #ifdef IPSEC 240 error = ipsec_init_policy(so, &inp->inp_sp); 241 if (error != 0) { 242 kfree(inp, M_PCB); 243 return (error); 244 } 245 #endif 246 #ifdef INET6 247 if (INP_SOCKAF(so) == AF_INET6 && ip6_v6only) 248 inp->inp_flags |= IN6P_IPV6_V6ONLY; 249 if (ip6_auto_flowlabel) 250 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 251 #endif 252 soreference(so); 253 so->so_pcb = inp; 254 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list); 255 pcbinfo->ipi_count++; 256 return (0); 257 } 258 259 /* 260 * Unlink a pcb with the intention of moving it to another cpu with a 261 * different pcbinfo. While unlinked nothing should attempt to dereference 262 * inp_pcbinfo, NULL it out so we assert if it does. 263 */ 264 void 265 in_pcbunlink(struct inpcb *inp, struct inpcbinfo *pcbinfo) 266 { 267 KKASSERT(inp->inp_pcbinfo == pcbinfo); 268 269 LIST_REMOVE(inp, inp_list); 270 pcbinfo->ipi_count--; 271 inp->inp_pcbinfo = NULL; 272 } 273 274 /* 275 * Relink a pcb into a new pcbinfo. 276 */ 277 void 278 in_pcblink(struct inpcb *inp, struct inpcbinfo *pcbinfo) 279 { 280 KKASSERT(inp->inp_pcbinfo == NULL); 281 inp->inp_pcbinfo = pcbinfo; 282 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list); 283 pcbinfo->ipi_count++; 284 } 285 286 int 287 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 288 { 289 struct socket *so = inp->inp_socket; 290 unsigned short *lastport; 291 struct sockaddr_in *sin; 292 struct sockaddr_in jsin; 293 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 294 struct ucred *cred = NULL; 295 u_short lport = 0; 296 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 297 int error; 298 299 if (TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) /* XXX broken! */ 300 return (EADDRNOTAVAIL); 301 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 302 return (EINVAL); /* already bound */ 303 304 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) 305 wild = 1; /* neither SO_REUSEADDR nor SO_REUSEPORT is set */ 306 if (td->td_proc) 307 cred = td->td_proc->p_ucred; 308 309 /* 310 * This has to be atomic. If the porthash is shared across multiple 311 * protocol threads (aka tcp) then the token will be non-NULL. 312 */ 313 if (pcbinfo->porttoken) 314 lwkt_gettoken(pcbinfo->porttoken); 315 316 if (nam != NULL) { 317 sin = (struct sockaddr_in *)nam; 318 if (nam->sa_len != sizeof *sin) { 319 error = EINVAL; 320 goto done; 321 } 322 #ifdef notdef 323 /* 324 * We should check the family, but old programs 325 * incorrectly fail to initialize it. 326 */ 327 if (sin->sin_family != AF_INET) { 328 error = EAFNOSUPPORT; 329 goto done; 330 } 331 #endif 332 if (!prison_replace_wildcards(td, nam)) { 333 error = EINVAL; 334 goto done; 335 } 336 lport = sin->sin_port; 337 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 338 /* 339 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 340 * allow complete duplication of binding if 341 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 342 * and a multicast address is bound on both 343 * new and duplicated sockets. 344 */ 345 if (so->so_options & SO_REUSEADDR) 346 reuseport = SO_REUSEADDR | SO_REUSEPORT; 347 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 348 sin->sin_port = 0; /* yech... */ 349 bzero(&sin->sin_zero, sizeof sin->sin_zero); 350 if (ifa_ifwithaddr((struct sockaddr *)sin) == NULL) { 351 error = EADDRNOTAVAIL; 352 goto done; 353 } 354 } 355 if (lport != 0) { 356 struct inpcb *t; 357 358 /* GROSS */ 359 if (ntohs(lport) < IPPORT_RESERVED && 360 cred && 361 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0)) { 362 error = EACCES; 363 goto done; 364 } 365 if (so->so_cred->cr_uid != 0 && 366 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 367 t = in_pcblookup_local(pcbinfo, 368 sin->sin_addr, 369 lport, 370 INPLOOKUP_WILDCARD, 371 cred); 372 if (t && 373 (!in_nullhost(sin->sin_addr) || 374 !in_nullhost(t->inp_laddr) || 375 (t->inp_socket->so_options & 376 SO_REUSEPORT) == 0) && 377 (so->so_cred->cr_uid != 378 t->inp_socket->so_cred->cr_uid)) { 379 #ifdef INET6 380 if (!in_nullhost(sin->sin_addr) || 381 !in_nullhost(t->inp_laddr) || 382 INP_SOCKAF(so) == 383 INP_SOCKAF(t->inp_socket)) 384 #endif 385 { 386 error = EADDRINUSE; 387 goto done; 388 } 389 } 390 } 391 if (cred && !prison_replace_wildcards(td, nam)) { 392 error = EADDRNOTAVAIL; 393 goto done; 394 } 395 t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport, 396 wild, cred); 397 if (t && !(reuseport & t->inp_socket->so_options)) { 398 #ifdef INET6 399 if (!in_nullhost(sin->sin_addr) || 400 !in_nullhost(t->inp_laddr) || 401 INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) 402 #endif 403 { 404 error = EADDRINUSE; 405 goto done; 406 } 407 } 408 } 409 inp->inp_laddr = sin->sin_addr; 410 } 411 if (lport == 0) { 412 ushort first, last; 413 int count; 414 415 jsin.sin_family = AF_INET; 416 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 417 if (!prison_replace_wildcards(td, (struct sockaddr *)&jsin)) { 418 inp->inp_laddr.s_addr = INADDR_ANY; 419 error = EINVAL; 420 goto done; 421 } 422 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 423 424 inp->inp_flags |= INP_ANONPORT; 425 426 if (inp->inp_flags & INP_HIGHPORT) { 427 first = ipport_hifirstauto; /* sysctl */ 428 last = ipport_hilastauto; 429 lastport = &pcbinfo->lasthi; 430 } else if (inp->inp_flags & INP_LOWPORT) { 431 if (cred && 432 (error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0))) { 433 inp->inp_laddr.s_addr = INADDR_ANY; 434 goto done; 435 } 436 first = ipport_lowfirstauto; /* 1023 */ 437 last = ipport_lowlastauto; /* 600 */ 438 lastport = &pcbinfo->lastlow; 439 } else { 440 first = ipport_firstauto; /* sysctl */ 441 last = ipport_lastauto; 442 lastport = &pcbinfo->lastport; 443 } 444 /* 445 * Simple check to ensure all ports are not used up causing 446 * a deadlock here. 447 * 448 * We split the two cases (up and down) so that the direction 449 * is not being tested on each round of the loop. 450 */ 451 if (first > last) { 452 /* 453 * counting down 454 */ 455 count = first - last; 456 457 do { 458 if (count-- < 0) { /* completely used? */ 459 inp->inp_laddr.s_addr = INADDR_ANY; 460 error = EADDRNOTAVAIL; 461 goto done; 462 } 463 --*lastport; 464 if (*lastport > first || *lastport < last) 465 *lastport = first; 466 lport = htons(*lastport); 467 } while (in_pcblookup_local(pcbinfo, inp->inp_laddr, 468 lport, wild, cred)); 469 } else { 470 /* 471 * counting up 472 */ 473 count = last - first; 474 475 do { 476 if (count-- < 0) { /* completely used? */ 477 inp->inp_laddr.s_addr = INADDR_ANY; 478 error = EADDRNOTAVAIL; 479 goto done; 480 } 481 ++*lastport; 482 if (*lastport < first || *lastport > last) 483 *lastport = first; 484 lport = htons(*lastport); 485 } while (in_pcblookup_local(pcbinfo, inp->inp_laddr, 486 lport, wild, cred)); 487 } 488 } 489 inp->inp_lport = lport; 490 491 jsin.sin_family = AF_INET; 492 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 493 if (!prison_replace_wildcards(td, (struct sockaddr*)&jsin)) { 494 inp->inp_laddr.s_addr = INADDR_ANY; 495 inp->inp_lport = 0; 496 error = EINVAL; 497 goto done; 498 } 499 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 500 501 if (in_pcbinsporthash(inp) != 0) { 502 inp->inp_laddr.s_addr = INADDR_ANY; 503 inp->inp_lport = 0; 504 error = EAGAIN; 505 goto done; 506 } 507 error = 0; 508 done: 509 if (pcbinfo->porttoken) 510 lwkt_reltoken(pcbinfo->porttoken); 511 return error; 512 } 513 514 static struct inpcb * 515 in_pcblookup_addrport(struct inpcbinfo *pcbinfo, struct in_addr laddr, 516 u_short lport, struct in_addr faddr, u_short fport, struct ucred *cred) 517 { 518 struct inpcb *inp; 519 struct inpcbporthead *porthash; 520 struct inpcbport *phd; 521 struct inpcb *match = NULL; 522 523 /* 524 * If the porthashbase is shared across several cpus we need 525 * to lock. 526 */ 527 if (pcbinfo->porttoken) 528 lwkt_gettoken(pcbinfo->porttoken); 529 530 /* 531 * Best fit PCB lookup. 532 * 533 * First see if this local port is in use by looking on the 534 * port hash list. 535 */ 536 porthash = &pcbinfo->porthashbase[ 537 INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; 538 LIST_FOREACH(phd, porthash, phd_hash) { 539 if (phd->phd_port == lport) 540 break; 541 } 542 if (phd != NULL) { 543 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 544 #ifdef INET6 545 if ((inp->inp_vflag & INP_IPV4) == 0) 546 continue; 547 #endif 548 if (inp->inp_laddr.s_addr != INADDR_ANY && 549 inp->inp_laddr.s_addr != laddr.s_addr) 550 continue; 551 552 if (inp->inp_faddr.s_addr != INADDR_ANY && 553 inp->inp_faddr.s_addr != faddr.s_addr) 554 continue; 555 556 if (inp->inp_fport != 0 && inp->inp_fport != fport) 557 continue; 558 559 if (cred == NULL || 560 cred->cr_prison == 561 inp->inp_socket->so_cred->cr_prison) { 562 match = inp; 563 break; 564 } 565 } 566 } 567 if (pcbinfo->porttoken) 568 lwkt_reltoken(pcbinfo->porttoken); 569 return (match); 570 } 571 572 int 573 in_pcbconn_bind(struct inpcb *inp, const struct sockaddr *nam, 574 struct thread *td) 575 { 576 struct proc *p = td->td_proc; 577 unsigned short *lastport; 578 const struct sockaddr_in *sin = (const struct sockaddr_in *)nam; 579 struct sockaddr_in jsin; 580 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 581 struct ucred *cred = NULL; 582 u_short lport = 0; 583 ushort first, last; 584 int count, error, dup = 0; 585 586 if (TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) /* XXX broken! */ 587 return (EADDRNOTAVAIL); 588 589 KKASSERT(inp->inp_laddr.s_addr != INADDR_ANY); 590 if (inp->inp_lport != 0) 591 return (EINVAL); /* already bound */ 592 593 KKASSERT(p); 594 cred = p->p_ucred; 595 596 /* 597 * This has to be atomic. If the porthash is shared across multiple 598 * protocol threads (aka tcp) then the token will be non-NULL. 599 */ 600 if (pcbinfo->porttoken) 601 lwkt_gettoken(pcbinfo->porttoken); 602 603 jsin.sin_family = AF_INET; 604 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 605 if (!prison_replace_wildcards(td, (struct sockaddr *)&jsin)) { 606 inp->inp_laddr.s_addr = INADDR_ANY; 607 error = EINVAL; 608 goto done; 609 } 610 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 611 612 inp->inp_flags |= INP_ANONPORT; 613 614 if (inp->inp_flags & INP_HIGHPORT) { 615 first = ipport_hifirstauto; /* sysctl */ 616 last = ipport_hilastauto; 617 lastport = &pcbinfo->lasthi; 618 } else if (inp->inp_flags & INP_LOWPORT) { 619 if (cred && 620 (error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0))) { 621 inp->inp_laddr.s_addr = INADDR_ANY; 622 goto done; 623 } 624 first = ipport_lowfirstauto; /* 1023 */ 625 last = ipport_lowlastauto; /* 600 */ 626 lastport = &pcbinfo->lastlow; 627 } else { 628 first = ipport_firstauto; /* sysctl */ 629 last = ipport_lastauto; 630 lastport = &pcbinfo->lastport; 631 } 632 633 again: 634 /* 635 * Simple check to ensure all ports are not used up causing 636 * a deadlock here. 637 * 638 * We split the two cases (up and down) so that the direction 639 * is not being tested on each round of the loop. 640 */ 641 if (first > last) { 642 /* 643 * counting down 644 */ 645 count = first - last; 646 647 do { 648 if (count-- < 0) { /* completely used? */ 649 inp->inp_laddr.s_addr = INADDR_ANY; 650 error = EADDRNOTAVAIL; 651 goto done; 652 } 653 --*lastport; 654 if (*lastport > first || *lastport < last) 655 *lastport = first; 656 lport = htons(*lastport); 657 } while (in_pcblookup_addrport(pcbinfo, inp->inp_laddr, lport, 658 sin->sin_addr, sin->sin_port, cred)); 659 } else { 660 /* 661 * counting up 662 */ 663 count = last - first; 664 665 do { 666 if (count-- < 0) { /* completely used? */ 667 inp->inp_laddr.s_addr = INADDR_ANY; 668 error = EADDRNOTAVAIL; 669 goto done; 670 } 671 ++*lastport; 672 if (*lastport < first || *lastport > last) 673 *lastport = first; 674 lport = htons(*lastport); 675 } while (in_pcblookup_addrport(pcbinfo, inp->inp_laddr, lport, 676 sin->sin_addr, sin->sin_port, cred)); 677 } 678 679 /* This could happen on loopback interface */ 680 if (sin->sin_port == lport && 681 sin->sin_addr.s_addr == inp->inp_laddr.s_addr) { 682 if (dup) { 683 /* 684 * Duplicate again; give up 685 */ 686 inp->inp_laddr.s_addr = INADDR_ANY; 687 error = EADDRNOTAVAIL; 688 goto done; 689 } 690 dup = 1; 691 goto again; 692 } 693 inp->inp_lport = lport; 694 695 jsin.sin_family = AF_INET; 696 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 697 if (!prison_replace_wildcards(td, (struct sockaddr*)&jsin)) { 698 inp->inp_laddr.s_addr = INADDR_ANY; 699 inp->inp_lport = 0; 700 error = EINVAL; 701 goto done; 702 } 703 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 704 705 if (in_pcbinsporthash(inp) != 0) { 706 inp->inp_laddr.s_addr = INADDR_ANY; 707 inp->inp_lport = 0; 708 error = EAGAIN; 709 goto done; 710 } 711 error = 0; 712 done: 713 if (pcbinfo->porttoken) 714 lwkt_reltoken(pcbinfo->porttoken); 715 return error; 716 } 717 718 /* 719 * Transform old in_pcbconnect() into an inner subroutine for new 720 * in_pcbconnect(): Do some validity-checking on the remote 721 * address (in mbuf 'nam') and then determine local host address 722 * (i.e., which interface) to use to access that remote host. 723 * 724 * This preserves definition of in_pcbconnect(), while supporting a 725 * slightly different version for T/TCP. (This is more than 726 * a bit of a kludge, but cleaning up the internal interfaces would 727 * have forced minor changes in every protocol). 728 */ 729 int 730 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, 731 struct sockaddr_in **plocal_sin, struct thread *td) 732 { 733 struct in_ifaddr *ia; 734 struct ucred *cred = NULL; 735 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 736 struct sockaddr *jsin; 737 int jailed = 0, alloc_route = 0; 738 739 if (nam->sa_len != sizeof *sin) 740 return (EINVAL); 741 if (sin->sin_family != AF_INET) 742 return (EAFNOSUPPORT); 743 if (sin->sin_port == 0) 744 return (EADDRNOTAVAIL); 745 if (td && td->td_proc && td->td_proc->p_ucred) 746 cred = td->td_proc->p_ucred; 747 if (cred && cred->cr_prison) 748 jailed = 1; 749 if (!TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) { 750 ia = TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia; 751 /* 752 * If the destination address is INADDR_ANY, 753 * use the primary local address. 754 * If the supplied address is INADDR_BROADCAST, 755 * and the primary interface supports broadcast, 756 * choose the broadcast address for that interface. 757 */ 758 if (sin->sin_addr.s_addr == INADDR_ANY) 759 sin->sin_addr = IA_SIN(ia)->sin_addr; 760 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST && 761 (ia->ia_ifp->if_flags & IFF_BROADCAST)) 762 sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr; 763 } 764 if (inp->inp_laddr.s_addr == INADDR_ANY) { 765 struct route *ro; 766 767 ia = NULL; 768 /* 769 * If route is known or can be allocated now, 770 * our src addr is taken from the i/f, else punt. 771 * Note that we should check the address family of the cached 772 * destination, in case of sharing the cache with IPv6. 773 */ 774 ro = &inp->inp_route; 775 if (ro->ro_rt && 776 (!(ro->ro_rt->rt_flags & RTF_UP) || 777 ro->ro_dst.sa_family != AF_INET || 778 satosin(&ro->ro_dst)->sin_addr.s_addr != 779 sin->sin_addr.s_addr || 780 inp->inp_socket->so_options & SO_DONTROUTE)) { 781 RTFREE(ro->ro_rt); 782 ro->ro_rt = NULL; 783 } 784 if (!(inp->inp_socket->so_options & SO_DONTROUTE) && /*XXX*/ 785 (ro->ro_rt == NULL || 786 ro->ro_rt->rt_ifp == NULL)) { 787 /* No route yet, so try to acquire one */ 788 bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); 789 ro->ro_dst.sa_family = AF_INET; 790 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 791 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = 792 sin->sin_addr; 793 rtalloc(ro); 794 alloc_route = 1; 795 } 796 /* 797 * If we found a route, use the address 798 * corresponding to the outgoing interface 799 * unless it is the loopback (in case a route 800 * to our address on another net goes to loopback). 801 */ 802 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { 803 if (jailed) { 804 if (jailed_ip(cred->cr_prison, 805 ro->ro_rt->rt_ifa->ifa_addr)) { 806 ia = ifatoia(ro->ro_rt->rt_ifa); 807 } 808 } else { 809 ia = ifatoia(ro->ro_rt->rt_ifa); 810 } 811 } 812 if (ia == NULL) { 813 u_short fport = sin->sin_port; 814 815 sin->sin_port = 0; 816 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); 817 if (ia && jailed && !jailed_ip(cred->cr_prison, 818 sintosa(&ia->ia_addr))) 819 ia = NULL; 820 if (ia == NULL) 821 ia = ifatoia(ifa_ifwithnet(sintosa(sin))); 822 if (ia && jailed && !jailed_ip(cred->cr_prison, 823 sintosa(&ia->ia_addr))) 824 ia = NULL; 825 sin->sin_port = fport; 826 if (ia == NULL && 827 !TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) 828 ia = TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia; 829 if (ia && jailed && !jailed_ip(cred->cr_prison, 830 sintosa(&ia->ia_addr))) 831 ia = NULL; 832 833 if (!jailed && ia == NULL) 834 goto fail; 835 } 836 /* 837 * If the destination address is multicast and an outgoing 838 * interface has been set as a multicast option, use the 839 * address of that interface as our source address. 840 */ 841 if (!jailed && IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && 842 inp->inp_moptions != NULL) { 843 struct ip_moptions *imo; 844 struct ifnet *ifp; 845 846 imo = inp->inp_moptions; 847 if (imo->imo_multicast_ifp != NULL) { 848 struct in_ifaddr_container *iac; 849 850 ifp = imo->imo_multicast_ifp; 851 ia = NULL; 852 TAILQ_FOREACH(iac, 853 &in_ifaddrheads[mycpuid], ia_link) { 854 if (iac->ia->ia_ifp == ifp) { 855 ia = iac->ia; 856 break; 857 } 858 } 859 if (ia == NULL) 860 goto fail; 861 } 862 } 863 /* 864 * Don't do pcblookup call here; return interface in plocal_sin 865 * and exit to caller, that will do the lookup. 866 */ 867 if (ia == NULL && jailed) { 868 if ((jsin = prison_get_nonlocal(cred->cr_prison, AF_INET, NULL)) != NULL || 869 (jsin = prison_get_local(cred->cr_prison, AF_INET, NULL)) != NULL) { 870 *plocal_sin = satosin(jsin); 871 } else { 872 /* IPv6 only Jail */ 873 goto fail; 874 } 875 } else { 876 *plocal_sin = &ia->ia_addr; 877 } 878 } 879 return (0); 880 fail: 881 if (alloc_route) { 882 struct route *ro = &inp->inp_route; 883 884 if (ro->ro_rt != NULL) 885 RTFREE(ro->ro_rt); 886 bzero(ro, sizeof(*ro)); 887 } 888 return (EADDRNOTAVAIL); 889 } 890 891 /* 892 * Outer subroutine: 893 * Connect from a socket to a specified address. 894 * Both address and port must be specified in argument sin. 895 * If don't have a local address for this socket yet, 896 * then pick one. 897 */ 898 int 899 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 900 { 901 struct sockaddr_in *if_sin; 902 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 903 int error; 904 905 /* Call inner routine to assign local interface address. */ 906 if ((error = in_pcbladdr(inp, nam, &if_sin, td)) != 0) 907 return (error); 908 909 if (in_pcblookup_hash(inp->inp_cpcbinfo, sin->sin_addr, sin->sin_port, 910 inp->inp_laddr.s_addr ? 911 inp->inp_laddr : if_sin->sin_addr, 912 inp->inp_lport, FALSE, NULL) != NULL) { 913 return (EADDRINUSE); 914 } 915 if (inp->inp_laddr.s_addr == INADDR_ANY) { 916 if (inp->inp_lport == 0) { 917 error = in_pcbbind(inp, NULL, td); 918 if (error) 919 return (error); 920 } 921 inp->inp_laddr = if_sin->sin_addr; 922 } 923 inp->inp_faddr = sin->sin_addr; 924 inp->inp_fport = sin->sin_port; 925 in_pcbinsconnhash(inp); 926 return (0); 927 } 928 929 void 930 in_pcbdisconnect(struct inpcb *inp) 931 { 932 933 inp->inp_faddr.s_addr = INADDR_ANY; 934 inp->inp_fport = 0; 935 in_pcbremconnhash(inp); 936 if (inp->inp_socket->so_state & SS_NOFDREF) 937 in_pcbdetach(inp); 938 } 939 940 void 941 in_pcbdetach(struct inpcb *inp) 942 { 943 struct socket *so = inp->inp_socket; 944 struct inpcbinfo *ipi = inp->inp_pcbinfo; 945 946 #ifdef IPSEC 947 ipsec4_delete_pcbpolicy(inp); 948 #endif /*IPSEC*/ 949 inp->inp_gencnt = ++ipi->ipi_gencnt; 950 KKASSERT((so->so_state & SS_ASSERTINPROG) == 0); 951 in_pcbremlists(inp); 952 so->so_pcb = NULL; 953 sofree(so); /* remove pcb ref */ 954 if (inp->inp_options) 955 m_free(inp->inp_options); 956 if (inp->inp_route.ro_rt) 957 rtfree(inp->inp_route.ro_rt); 958 ip_freemoptions(inp->inp_moptions); 959 inp->inp_vflag = 0; 960 kfree(inp, M_PCB); 961 } 962 963 /* 964 * The calling convention of in_setsockaddr() and in_setpeeraddr() was 965 * modified to match the pru_sockaddr() and pru_peeraddr() entry points 966 * in struct pr_usrreqs, so that protocols can just reference then directly 967 * without the need for a wrapper function. The socket must have a valid 968 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 969 * except through a kernel programming error, so it is acceptable to panic 970 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 971 * because there actually /is/ a programming error somewhere... XXX) 972 */ 973 int 974 in_setsockaddr(struct socket *so, struct sockaddr **nam) 975 { 976 struct inpcb *inp; 977 struct sockaddr_in *sin; 978 979 /* 980 * Do the malloc first in case it blocks. 981 */ 982 sin = kmalloc(sizeof *sin, M_SONAME, M_WAITOK | M_ZERO); 983 sin->sin_family = AF_INET; 984 sin->sin_len = sizeof *sin; 985 986 crit_enter(); 987 inp = so->so_pcb; 988 if (!inp) { 989 crit_exit(); 990 kfree(sin, M_SONAME); 991 return (ECONNRESET); 992 } 993 sin->sin_port = inp->inp_lport; 994 sin->sin_addr = inp->inp_laddr; 995 crit_exit(); 996 997 *nam = (struct sockaddr *)sin; 998 return (0); 999 } 1000 1001 void 1002 in_setsockaddr_dispatch(netmsg_t msg) 1003 { 1004 int error; 1005 1006 error = in_setsockaddr(msg->base.nm_so, msg->peeraddr.nm_nam); 1007 lwkt_replymsg(&msg->lmsg, error); 1008 } 1009 1010 int 1011 in_setpeeraddr(struct socket *so, struct sockaddr **nam) 1012 { 1013 struct inpcb *inp; 1014 struct sockaddr_in *sin; 1015 1016 /* 1017 * Do the malloc first in case it blocks. 1018 */ 1019 sin = kmalloc(sizeof *sin, M_SONAME, M_WAITOK | M_ZERO); 1020 sin->sin_family = AF_INET; 1021 sin->sin_len = sizeof *sin; 1022 1023 crit_enter(); 1024 inp = so->so_pcb; 1025 if (!inp) { 1026 crit_exit(); 1027 kfree(sin, M_SONAME); 1028 return (ECONNRESET); 1029 } 1030 sin->sin_port = inp->inp_fport; 1031 sin->sin_addr = inp->inp_faddr; 1032 crit_exit(); 1033 1034 *nam = (struct sockaddr *)sin; 1035 return (0); 1036 } 1037 1038 void 1039 in_setpeeraddr_dispatch(netmsg_t msg) 1040 { 1041 int error; 1042 1043 error = in_setpeeraddr(msg->base.nm_so, msg->peeraddr.nm_nam); 1044 lwkt_replymsg(&msg->lmsg, error); 1045 } 1046 1047 void 1048 in_pcbnotifyall(struct inpcbhead *head, struct in_addr faddr, int err, 1049 void (*notify)(struct inpcb *, int)) 1050 { 1051 struct inpcb *inp, *ninp; 1052 1053 /* 1054 * note: if INP_PLACEMARKER is set we must ignore the rest of 1055 * the structure and skip it. 1056 */ 1057 crit_enter(); 1058 LIST_FOREACH_MUTABLE(inp, head, inp_list, ninp) { 1059 if (inp->inp_flags & INP_PLACEMARKER) 1060 continue; 1061 #ifdef INET6 1062 if (!(inp->inp_vflag & INP_IPV4)) 1063 continue; 1064 #endif 1065 if (inp->inp_faddr.s_addr != faddr.s_addr || 1066 inp->inp_socket == NULL) 1067 continue; 1068 (*notify)(inp, err); /* can remove inp from list! */ 1069 } 1070 crit_exit(); 1071 } 1072 1073 void 1074 in_pcbpurgeif0(struct inpcb *head, struct ifnet *ifp) 1075 { 1076 struct inpcb *inp; 1077 struct ip_moptions *imo; 1078 int i, gap; 1079 1080 for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) { 1081 if (inp->inp_flags & INP_PLACEMARKER) 1082 continue; 1083 imo = inp->inp_moptions; 1084 if ((inp->inp_vflag & INP_IPV4) && imo != NULL) { 1085 /* 1086 * Unselect the outgoing interface if it is being 1087 * detached. 1088 */ 1089 if (imo->imo_multicast_ifp == ifp) 1090 imo->imo_multicast_ifp = NULL; 1091 1092 /* 1093 * Drop multicast group membership if we joined 1094 * through the interface being detached. 1095 */ 1096 for (i = 0, gap = 0; i < imo->imo_num_memberships; 1097 i++) { 1098 if (imo->imo_membership[i]->inm_ifp == ifp) { 1099 in_delmulti(imo->imo_membership[i]); 1100 gap++; 1101 } else if (gap != 0) 1102 imo->imo_membership[i - gap] = 1103 imo->imo_membership[i]; 1104 } 1105 imo->imo_num_memberships -= gap; 1106 } 1107 } 1108 } 1109 1110 /* 1111 * Check for alternatives when higher level complains 1112 * about service problems. For now, invalidate cached 1113 * routing information. If the route was created dynamically 1114 * (by a redirect), time to try a default gateway again. 1115 */ 1116 void 1117 in_losing(struct inpcb *inp) 1118 { 1119 struct rtentry *rt; 1120 struct rt_addrinfo rtinfo; 1121 1122 if ((rt = inp->inp_route.ro_rt)) { 1123 bzero(&rtinfo, sizeof(struct rt_addrinfo)); 1124 rtinfo.rti_info[RTAX_DST] = rt_key(rt); 1125 rtinfo.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1126 rtinfo.rti_info[RTAX_NETMASK] = rt_mask(rt); 1127 rtinfo.rti_flags = rt->rt_flags; 1128 rt_missmsg(RTM_LOSING, &rtinfo, rt->rt_flags, 0); 1129 if (rt->rt_flags & RTF_DYNAMIC) 1130 rtrequest1_global(RTM_DELETE, &rtinfo, NULL, NULL); 1131 inp->inp_route.ro_rt = NULL; 1132 rtfree(rt); 1133 /* 1134 * A new route can be allocated 1135 * the next time output is attempted. 1136 */ 1137 } 1138 } 1139 1140 /* 1141 * After a routing change, flush old routing 1142 * and allocate a (hopefully) better one. 1143 */ 1144 void 1145 in_rtchange(struct inpcb *inp, int err) 1146 { 1147 if (inp->inp_route.ro_rt) { 1148 rtfree(inp->inp_route.ro_rt); 1149 inp->inp_route.ro_rt = NULL; 1150 /* 1151 * A new route can be allocated the next time 1152 * output is attempted. 1153 */ 1154 } 1155 } 1156 1157 /* 1158 * Lookup a PCB based on the local address and port. 1159 */ 1160 struct inpcb * 1161 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, 1162 u_int lport_arg, int wild_okay, struct ucred *cred) 1163 { 1164 struct inpcb *inp; 1165 int matchwild = 3, wildcard; 1166 u_short lport = lport_arg; 1167 struct inpcbporthead *porthash; 1168 struct inpcbport *phd; 1169 struct inpcb *match = NULL; 1170 1171 /* 1172 * If the porthashbase is shared across several cpus we need 1173 * to lock. 1174 */ 1175 if (pcbinfo->porttoken) 1176 lwkt_gettoken(pcbinfo->porttoken); 1177 1178 /* 1179 * Best fit PCB lookup. 1180 * 1181 * First see if this local port is in use by looking on the 1182 * port hash list. 1183 */ 1184 porthash = &pcbinfo->porthashbase[ 1185 INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; 1186 LIST_FOREACH(phd, porthash, phd_hash) { 1187 if (phd->phd_port == lport) 1188 break; 1189 } 1190 if (phd != NULL) { 1191 /* 1192 * Port is in use by one or more PCBs. Look for best 1193 * fit. 1194 */ 1195 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 1196 wildcard = 0; 1197 #ifdef INET6 1198 if ((inp->inp_vflag & INP_IPV4) == 0) 1199 continue; 1200 #endif 1201 if (inp->inp_faddr.s_addr != INADDR_ANY) 1202 wildcard++; 1203 if (inp->inp_laddr.s_addr != INADDR_ANY) { 1204 if (laddr.s_addr == INADDR_ANY) 1205 wildcard++; 1206 else if (inp->inp_laddr.s_addr != laddr.s_addr) 1207 continue; 1208 } else { 1209 if (laddr.s_addr != INADDR_ANY) 1210 wildcard++; 1211 } 1212 if (wildcard && !wild_okay) 1213 continue; 1214 if (wildcard < matchwild && 1215 (cred == NULL || 1216 cred->cr_prison == 1217 inp->inp_socket->so_cred->cr_prison)) { 1218 match = inp; 1219 matchwild = wildcard; 1220 if (matchwild == 0) { 1221 break; 1222 } 1223 } 1224 } 1225 } 1226 if (pcbinfo->porttoken) 1227 lwkt_reltoken(pcbinfo->porttoken); 1228 return (match); 1229 } 1230 1231 /* 1232 * Lookup PCB in hash list. 1233 */ 1234 struct inpcb * 1235 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1236 u_int fport_arg, struct in_addr laddr, u_int lport_arg, 1237 boolean_t wildcard, struct ifnet *ifp) 1238 { 1239 struct inpcbhead *head; 1240 struct inpcb *inp, *jinp=NULL; 1241 u_short fport = fport_arg, lport = lport_arg; 1242 1243 /* 1244 * First look for an exact match. 1245 */ 1246 head = &pcbinfo->hashbase[INP_PCBCONNHASH(faddr.s_addr, fport, 1247 laddr.s_addr, lport, pcbinfo->hashmask)]; 1248 LIST_FOREACH(inp, head, inp_hash) { 1249 #ifdef INET6 1250 if (!(inp->inp_vflag & INP_IPV4)) 1251 continue; 1252 #endif 1253 if (in_hosteq(inp->inp_faddr, faddr) && 1254 in_hosteq(inp->inp_laddr, laddr) && 1255 inp->inp_fport == fport && inp->inp_lport == lport) { 1256 /* found */ 1257 if (inp->inp_socket == NULL || 1258 inp->inp_socket->so_cred->cr_prison == NULL) { 1259 return (inp); 1260 } else { 1261 if (jinp == NULL) 1262 jinp = inp; 1263 } 1264 } 1265 } 1266 if (jinp != NULL) 1267 return (jinp); 1268 if (wildcard) { 1269 struct inpcb *local_wild = NULL; 1270 struct inpcb *jinp_wild = NULL; 1271 #ifdef INET6 1272 struct inpcb *local_wild_mapped = NULL; 1273 #endif 1274 struct inpcontainer *ic; 1275 struct inpcontainerhead *chead; 1276 struct sockaddr_in jsin; 1277 struct ucred *cred; 1278 1279 /* 1280 * Order of socket selection: 1281 * 1. non-jailed, non-wild. 1282 * 2. non-jailed, wild. 1283 * 3. jailed, non-wild. 1284 * 4. jailed, wild. 1285 */ 1286 jsin.sin_family = AF_INET; 1287 chead = &pcbinfo->wildcardhashbase[ 1288 INP_PCBWILDCARDHASH(lport, pcbinfo->wildcardhashmask)]; 1289 LIST_FOREACH(ic, chead, ic_list) { 1290 inp = ic->ic_inp; 1291 jsin.sin_addr.s_addr = laddr.s_addr; 1292 #ifdef INET6 1293 if (!(inp->inp_vflag & INP_IPV4)) 1294 continue; 1295 #endif 1296 if (inp->inp_socket != NULL) 1297 cred = inp->inp_socket->so_cred; 1298 else 1299 cred = NULL; 1300 if (cred != NULL && jailed(cred)) { 1301 if (jinp != NULL) 1302 continue; 1303 else 1304 if (!jailed_ip(cred->cr_prison, 1305 (struct sockaddr *)&jsin)) 1306 continue; 1307 } 1308 if (inp->inp_lport == lport) { 1309 if (ifp && ifp->if_type == IFT_FAITH && 1310 !(inp->inp_flags & INP_FAITH)) 1311 continue; 1312 if (inp->inp_laddr.s_addr == laddr.s_addr) { 1313 if (cred != NULL && jailed(cred)) 1314 jinp = inp; 1315 else 1316 return (inp); 1317 } 1318 if (inp->inp_laddr.s_addr == INADDR_ANY) { 1319 #ifdef INET6 1320 if (INP_CHECK_SOCKAF(inp->inp_socket, 1321 AF_INET6)) 1322 local_wild_mapped = inp; 1323 else 1324 #endif 1325 if (cred != NULL && 1326 jailed(cred)) 1327 jinp_wild = inp; 1328 else 1329 local_wild = inp; 1330 } 1331 } 1332 } 1333 if (local_wild != NULL) 1334 return (local_wild); 1335 #ifdef INET6 1336 if (local_wild_mapped != NULL) 1337 return (local_wild_mapped); 1338 #endif 1339 if (jinp != NULL) 1340 return (jinp); 1341 return (jinp_wild); 1342 } 1343 1344 /* 1345 * Not found. 1346 */ 1347 return (NULL); 1348 } 1349 1350 /* 1351 * Insert PCB into connection hash table. 1352 */ 1353 void 1354 in_pcbinsconnhash(struct inpcb *inp) 1355 { 1356 struct inpcbinfo *pcbinfo = inp->inp_cpcbinfo; 1357 struct inpcbhead *bucket; 1358 u_int32_t hashkey_faddr, hashkey_laddr; 1359 1360 #ifdef INET6 1361 if (inp->inp_vflag & INP_IPV6) { 1362 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX JH */; 1363 hashkey_laddr = inp->in6p_laddr.s6_addr32[3] /* XXX JH */; 1364 } else { 1365 #endif 1366 hashkey_faddr = inp->inp_faddr.s_addr; 1367 hashkey_laddr = inp->inp_laddr.s_addr; 1368 #ifdef INET6 1369 } 1370 #endif 1371 1372 KASSERT(!(inp->inp_flags & INP_WILDCARD), 1373 ("already on wildcardhash")); 1374 KASSERT(!(inp->inp_flags & INP_CONNECTED), 1375 ("already on connhash")); 1376 inp->inp_flags |= INP_CONNECTED; 1377 1378 /* 1379 * Insert into the connection hash table. 1380 */ 1381 bucket = &pcbinfo->hashbase[INP_PCBCONNHASH(hashkey_faddr, 1382 inp->inp_fport, hashkey_laddr, inp->inp_lport, pcbinfo->hashmask)]; 1383 LIST_INSERT_HEAD(bucket, inp, inp_hash); 1384 } 1385 1386 /* 1387 * Remove PCB from connection hash table. 1388 */ 1389 void 1390 in_pcbremconnhash(struct inpcb *inp) 1391 { 1392 KASSERT(inp->inp_flags & INP_CONNECTED, ("inp not connected")); 1393 LIST_REMOVE(inp, inp_hash); 1394 inp->inp_flags &= ~INP_CONNECTED; 1395 } 1396 1397 /* 1398 * Insert PCB into port hash table. 1399 */ 1400 int 1401 in_pcbinsporthash(struct inpcb *inp) 1402 { 1403 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1404 struct inpcbporthead *pcbporthash; 1405 struct inpcbport *phd; 1406 1407 /* 1408 * If the porthashbase is shared across several cpus we need 1409 * to lock. 1410 */ 1411 if (pcbinfo->porttoken) 1412 lwkt_gettoken(pcbinfo->porttoken); 1413 1414 /* 1415 * Insert into the port hash table. 1416 */ 1417 pcbporthash = &pcbinfo->porthashbase[ 1418 INP_PCBPORTHASH(inp->inp_lport, pcbinfo->porthashmask)]; 1419 1420 /* Go through port list and look for a head for this lport. */ 1421 LIST_FOREACH(phd, pcbporthash, phd_hash) { 1422 if (phd->phd_port == inp->inp_lport) 1423 break; 1424 } 1425 1426 /* If none exists, malloc one and tack it on. */ 1427 if (phd == NULL) { 1428 KKASSERT(pcbinfo->portsave != NULL); 1429 phd = pcbinfo->portsave; 1430 pcbinfo->portsave = NULL; 1431 phd->phd_port = inp->inp_lport; 1432 LIST_INIT(&phd->phd_pcblist); 1433 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1434 } 1435 1436 inp->inp_phd = phd; 1437 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1438 1439 if (pcbinfo->porttoken) 1440 lwkt_reltoken(pcbinfo->porttoken); 1441 if (pcbinfo->portsave == NULL) { 1442 pcbinfo->portsave = kmalloc(sizeof(*pcbinfo->portsave), 1443 M_PCB, M_INTWAIT | M_ZERO); 1444 } 1445 return (0); 1446 } 1447 1448 void 1449 in_pcbinswildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1450 { 1451 struct inpcontainer *ic; 1452 struct inpcontainerhead *bucket; 1453 1454 bucket = &pcbinfo->wildcardhashbase[ 1455 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1456 1457 ic = kmalloc(sizeof(struct inpcontainer), M_TEMP, M_INTWAIT); 1458 ic->ic_inp = inp; 1459 LIST_INSERT_HEAD(bucket, ic, ic_list); 1460 } 1461 1462 /* 1463 * Insert PCB into wildcard hash table. 1464 */ 1465 void 1466 in_pcbinswildcardhash(struct inpcb *inp) 1467 { 1468 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1469 1470 KASSERT(!(inp->inp_flags & INP_CONNECTED), 1471 ("already on connhash")); 1472 KASSERT(!(inp->inp_flags & INP_WILDCARD), 1473 ("already on wildcardhash")); 1474 inp->inp_flags |= INP_WILDCARD; 1475 1476 in_pcbinswildcardhash_oncpu(inp, pcbinfo); 1477 } 1478 1479 void 1480 in_pcbremwildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1481 { 1482 struct inpcontainer *ic; 1483 struct inpcontainerhead *head; 1484 1485 /* find bucket */ 1486 head = &pcbinfo->wildcardhashbase[ 1487 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1488 1489 LIST_FOREACH(ic, head, ic_list) { 1490 if (ic->ic_inp == inp) 1491 goto found; 1492 } 1493 return; /* not found! */ 1494 1495 found: 1496 LIST_REMOVE(ic, ic_list); /* remove container from bucket chain */ 1497 kfree(ic, M_TEMP); /* deallocate container */ 1498 } 1499 1500 /* 1501 * Remove PCB from wildcard hash table. 1502 */ 1503 void 1504 in_pcbremwildcardhash(struct inpcb *inp) 1505 { 1506 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1507 1508 KASSERT(inp->inp_flags & INP_WILDCARD, ("inp not wildcard")); 1509 in_pcbremwildcardhash_oncpu(inp, pcbinfo); 1510 inp->inp_flags &= ~INP_WILDCARD; 1511 } 1512 1513 /* 1514 * Remove PCB from various lists. 1515 */ 1516 void 1517 in_pcbremlists(struct inpcb *inp) 1518 { 1519 struct inpcbinfo *pcbinfo; 1520 1521 if (inp->inp_lport) { 1522 struct inpcbport *phd; 1523 1524 pcbinfo = inp->inp_pcbinfo; 1525 if (pcbinfo->porttoken) 1526 lwkt_gettoken(pcbinfo->porttoken); 1527 1528 phd = inp->inp_phd; 1529 LIST_REMOVE(inp, inp_portlist); 1530 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1531 LIST_REMOVE(phd, phd_hash); 1532 kfree(phd, M_PCB); 1533 } 1534 if (pcbinfo->porttoken) 1535 lwkt_reltoken(pcbinfo->porttoken); 1536 } 1537 if (inp->inp_flags & INP_WILDCARD) { 1538 in_pcbremwildcardhash(inp); 1539 } else if (inp->inp_flags & INP_CONNECTED) { 1540 in_pcbremconnhash(inp); 1541 } 1542 LIST_REMOVE(inp, inp_list); 1543 inp->inp_pcbinfo->ipi_count--; 1544 } 1545 1546 int 1547 prison_xinpcb(struct thread *td, struct inpcb *inp) 1548 { 1549 struct ucred *cr; 1550 1551 if (td->td_proc == NULL) 1552 return (0); 1553 cr = td->td_proc->p_ucred; 1554 if (cr->cr_prison == NULL) 1555 return (0); 1556 if (inp->inp_socket && inp->inp_socket->so_cred && 1557 inp->inp_socket->so_cred->cr_prison && 1558 cr->cr_prison == inp->inp_socket->so_cred->cr_prison) 1559 return (0); 1560 return (1); 1561 } 1562 1563 int 1564 in_pcblist_global(SYSCTL_HANDLER_ARGS) 1565 { 1566 struct inpcbinfo *pcbinfo = arg1; 1567 struct inpcb *inp, *marker; 1568 struct xinpcb xi; 1569 int error, i, n; 1570 1571 /* 1572 * The process of preparing the TCB list is too time-consuming and 1573 * resource-intensive to repeat twice on every request. 1574 */ 1575 if (req->oldptr == NULL) { 1576 n = pcbinfo->ipi_count; 1577 req->oldidx = (n + n/8 + 10) * sizeof(struct xinpcb); 1578 return 0; 1579 } 1580 1581 if (req->newptr != NULL) 1582 return EPERM; 1583 1584 /* 1585 * OK, now we're committed to doing something. Re-fetch ipi_count 1586 * after obtaining the generation count. 1587 */ 1588 n = pcbinfo->ipi_count; 1589 1590 marker = kmalloc(sizeof(struct inpcb), M_TEMP, M_WAITOK|M_ZERO); 1591 marker->inp_flags |= INP_PLACEMARKER; 1592 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list); 1593 1594 i = 0; 1595 error = 0; 1596 1597 while ((inp = LIST_NEXT(marker, inp_list)) != NULL && i < n) { 1598 LIST_REMOVE(marker, inp_list); 1599 LIST_INSERT_AFTER(inp, marker, inp_list); 1600 1601 if (inp->inp_flags & INP_PLACEMARKER) 1602 continue; 1603 if (prison_xinpcb(req->td, inp)) 1604 continue; 1605 bzero(&xi, sizeof xi); 1606 xi.xi_len = sizeof xi; 1607 bcopy(inp, &xi.xi_inp, sizeof *inp); 1608 if (inp->inp_socket) 1609 sotoxsocket(inp->inp_socket, &xi.xi_socket); 1610 if ((error = SYSCTL_OUT(req, &xi, sizeof xi)) != 0) 1611 break; 1612 ++i; 1613 } 1614 LIST_REMOVE(marker, inp_list); 1615 if (error == 0 && i < n) { 1616 bzero(&xi, sizeof xi); 1617 xi.xi_len = sizeof xi; 1618 while (i < n) { 1619 error = SYSCTL_OUT(req, &xi, sizeof xi); 1620 ++i; 1621 } 1622 } 1623 kfree(marker, M_TEMP); 1624 return(error); 1625 } 1626 1627 int 1628 in_pcblist_global_nomarker(SYSCTL_HANDLER_ARGS, struct xinpcb **xi0, int *nxi0) 1629 { 1630 struct inpcbinfo *pcbinfo = arg1; 1631 struct inpcb *inp; 1632 struct xinpcb *xi; 1633 int nxi; 1634 1635 *nxi0 = 0; 1636 *xi0 = NULL; 1637 1638 /* 1639 * The process of preparing the PCB list is too time-consuming and 1640 * resource-intensive to repeat twice on every request. 1641 */ 1642 if (req->oldptr == NULL) { 1643 int n = pcbinfo->ipi_count; 1644 1645 req->oldidx = (n + n/8 + 10) * sizeof(struct xinpcb); 1646 return 0; 1647 } 1648 1649 if (req->newptr != NULL) 1650 return EPERM; 1651 1652 if (pcbinfo->ipi_count == 0) 1653 return 0; 1654 1655 nxi = 0; 1656 xi = kmalloc(pcbinfo->ipi_count * sizeof(*xi), M_TEMP, 1657 M_WAITOK | M_ZERO | M_NULLOK); 1658 if (xi == NULL) 1659 return ENOMEM; 1660 1661 LIST_FOREACH(inp, &pcbinfo->pcblisthead, inp_list) { 1662 struct xinpcb *xi_ptr = &xi[nxi]; 1663 1664 if (prison_xinpcb(req->td, inp)) 1665 continue; 1666 1667 xi_ptr->xi_len = sizeof(*xi_ptr); 1668 bcopy(inp, &xi_ptr->xi_inp, sizeof(*inp)); 1669 if (inp->inp_socket) 1670 sotoxsocket(inp->inp_socket, &xi_ptr->xi_socket); 1671 ++nxi; 1672 } 1673 1674 if (nxi == 0) { 1675 kfree(xi, M_TEMP); 1676 return 0; 1677 } 1678 1679 *nxi0 = nxi; 1680 *xi0 = xi; 1681 1682 return 0; 1683 } 1684 1685 void 1686 in_savefaddr(struct socket *so, const struct sockaddr *faddr) 1687 { 1688 struct sockaddr_in *sin; 1689 1690 KASSERT(faddr->sa_family == AF_INET, 1691 ("not AF_INET faddr %d", faddr->sa_family)); 1692 1693 sin = kmalloc(sizeof(*sin), M_SONAME, M_WAITOK | M_ZERO); 1694 sin->sin_family = AF_INET; 1695 sin->sin_len = sizeof(*sin); 1696 sin->sin_port = ((const struct sockaddr_in *)faddr)->sin_port; 1697 sin->sin_addr = ((const struct sockaddr_in *)faddr)->sin_addr; 1698 1699 so->so_faddr = (struct sockaddr *)sin; 1700 } 1701