1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1991, 1993, 1995 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 67 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.27 2004/01/02 04:06:42 ambrisko Exp $ 68 * $DragonFly: src/sys/netinet/in_pcb.c,v 1.48 2008/11/08 03:38:23 sephe Exp $ 69 */ 70 71 #include "opt_ipsec.h" 72 #include "opt_inet6.h" 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/malloc.h> 77 #include <sys/mbuf.h> 78 #include <sys/domain.h> 79 #include <sys/protosw.h> 80 #include <sys/socket.h> 81 #include <sys/socketvar.h> 82 #include <sys/proc.h> 83 #include <sys/priv.h> 84 #include <sys/jail.h> 85 #include <sys/kernel.h> 86 #include <sys/sysctl.h> 87 88 #include <sys/thread2.h> 89 #include <sys/socketvar2.h> 90 #include <sys/msgport2.h> 91 92 #include <machine/limits.h> 93 94 #include <net/if.h> 95 #include <net/if_types.h> 96 #include <net/route.h> 97 98 #include <netinet/in.h> 99 #include <netinet/in_pcb.h> 100 #include <netinet/in_var.h> 101 #include <netinet/ip_var.h> 102 #ifdef INET6 103 #include <netinet/ip6.h> 104 #include <netinet6/ip6_var.h> 105 #endif /* INET6 */ 106 107 #ifdef IPSEC 108 #include <netinet6/ipsec.h> 109 #include <netproto/key/key.h> 110 #include <netproto/ipsec/esp_var.h> 111 #endif 112 113 #ifdef FAST_IPSEC 114 #if defined(IPSEC) || defined(IPSEC_ESP) 115 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" 116 #endif 117 118 #include <netproto/ipsec/ipsec.h> 119 #include <netproto/ipsec/key.h> 120 #define IPSEC 121 #endif /* FAST_IPSEC */ 122 123 struct in_addr zeroin_addr; 124 125 /* 126 * These configure the range of local port addresses assigned to 127 * "unspecified" outgoing connections/packets/whatever. 128 */ 129 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 130 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 131 132 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */ 133 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */ 134 135 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 136 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 137 138 #define RANGECHK(var, min, max) \ 139 if ((var) < (min)) { (var) = (min); } \ 140 else if ((var) > (max)) { (var) = (max); } 141 142 int udpencap_enable = 1; /* enabled by default */ 143 int udpencap_port = 4500; /* triggers decapsulation */ 144 145 static int 146 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 147 { 148 int error; 149 150 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 151 if (!error) { 152 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 153 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 154 155 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 156 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 157 158 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 159 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 160 } 161 return (error); 162 } 163 164 #undef RANGECHK 165 166 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 167 168 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 169 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 170 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 171 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 172 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 173 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 174 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 175 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 176 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 177 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 178 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 179 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 180 181 /* 182 * in_pcb.c: manage the Protocol Control Blocks. 183 * 184 * NOTE: It is assumed that most of these functions will be called from 185 * a critical section. XXX - There are, unfortunately, a few exceptions 186 * to this rule that should be fixed. 187 * 188 * NOTE: The caller should initialize the cpu field to the cpu running the 189 * protocol stack associated with this inpcbinfo. 190 */ 191 192 void 193 in_pcbinfo_init(struct inpcbinfo *pcbinfo) 194 { 195 LIST_INIT(&pcbinfo->pcblisthead); 196 pcbinfo->cpu = -1; 197 pcbinfo->portsave = kmalloc(sizeof(*pcbinfo->portsave), M_PCB, 198 M_WAITOK | M_ZERO); 199 } 200 201 struct baddynamicports baddynamicports; 202 203 /* 204 * Check if the specified port is invalid for dynamic allocation. 205 */ 206 int 207 in_baddynamic(u_int16_t port, u_int16_t proto) 208 { 209 switch (proto) { 210 case IPPROTO_TCP: 211 return (DP_ISSET(baddynamicports.tcp, port)); 212 case IPPROTO_UDP: 213 #ifdef IPSEC 214 /* Cannot preset this as it is a sysctl */ 215 if (port == udpencap_port) 216 return (1); 217 #endif 218 return (DP_ISSET(baddynamicports.udp, port)); 219 default: 220 return (0); 221 } 222 } 223 224 225 /* 226 * Allocate a PCB and associate it with the socket. 227 */ 228 int 229 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) 230 { 231 struct inpcb *inp; 232 #ifdef IPSEC 233 int error; 234 #endif 235 236 inp = kmalloc(pcbinfo->ipi_size, M_PCB, M_WAITOK|M_ZERO); 237 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 238 inp->inp_pcbinfo = inp->inp_cpcbinfo = pcbinfo; 239 inp->inp_socket = so; 240 #ifdef IPSEC 241 error = ipsec_init_policy(so, &inp->inp_sp); 242 if (error != 0) { 243 kfree(inp, M_PCB); 244 return (error); 245 } 246 #endif 247 #ifdef INET6 248 if (INP_SOCKAF(so) == AF_INET6 && ip6_v6only) 249 inp->inp_flags |= IN6P_IPV6_V6ONLY; 250 if (ip6_auto_flowlabel) 251 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 252 #endif 253 soreference(so); 254 so->so_pcb = inp; 255 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list); 256 pcbinfo->ipi_count++; 257 return (0); 258 } 259 260 /* 261 * Unlink a pcb with the intention of moving it to another cpu with a 262 * different pcbinfo. While unlinked nothing should attempt to dereference 263 * inp_pcbinfo, NULL it out so we assert if it does. 264 */ 265 void 266 in_pcbunlink(struct inpcb *inp, struct inpcbinfo *pcbinfo) 267 { 268 KKASSERT(inp->inp_pcbinfo == pcbinfo); 269 270 LIST_REMOVE(inp, inp_list); 271 pcbinfo->ipi_count--; 272 inp->inp_pcbinfo = NULL; 273 } 274 275 /* 276 * Relink a pcb into a new pcbinfo. 277 */ 278 void 279 in_pcblink(struct inpcb *inp, struct inpcbinfo *pcbinfo) 280 { 281 KKASSERT(inp->inp_pcbinfo == NULL); 282 inp->inp_pcbinfo = pcbinfo; 283 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list); 284 pcbinfo->ipi_count++; 285 } 286 287 int 288 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 289 { 290 struct socket *so = inp->inp_socket; 291 struct proc *p = td->td_proc; 292 unsigned short *lastport; 293 struct sockaddr_in *sin; 294 struct sockaddr_in jsin; 295 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 296 struct ucred *cred = NULL; 297 u_short lport = 0; 298 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 299 int error; 300 301 KKASSERT(p); 302 303 if (TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) /* XXX broken! */ 304 return (EADDRNOTAVAIL); 305 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 306 return (EINVAL); /* already bound */ 307 308 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) 309 wild = 1; /* neither SO_REUSEADDR nor SO_REUSEPORT is set */ 310 if (p) 311 cred = p->p_ucred; 312 313 /* 314 * This has to be atomic. If the porthash is shared across multiple 315 * protocol threads (aka tcp) then the token will be non-NULL. 316 */ 317 if (pcbinfo->porttoken) 318 lwkt_gettoken(pcbinfo->porttoken); 319 320 if (nam != NULL) { 321 sin = (struct sockaddr_in *)nam; 322 if (nam->sa_len != sizeof *sin) { 323 error = EINVAL; 324 goto done; 325 } 326 #ifdef notdef 327 /* 328 * We should check the family, but old programs 329 * incorrectly fail to initialize it. 330 */ 331 if (sin->sin_family != AF_INET) { 332 error = EAFNOSUPPORT; 333 goto done; 334 } 335 #endif 336 if (!prison_replace_wildcards(td, nam)) { 337 error = EINVAL; 338 goto done; 339 } 340 lport = sin->sin_port; 341 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 342 /* 343 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 344 * allow complete duplication of binding if 345 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 346 * and a multicast address is bound on both 347 * new and duplicated sockets. 348 */ 349 if (so->so_options & SO_REUSEADDR) 350 reuseport = SO_REUSEADDR | SO_REUSEPORT; 351 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 352 sin->sin_port = 0; /* yech... */ 353 bzero(&sin->sin_zero, sizeof sin->sin_zero); 354 if (ifa_ifwithaddr((struct sockaddr *)sin) == NULL) { 355 error = EADDRNOTAVAIL; 356 goto done; 357 } 358 } 359 if (lport != 0) { 360 struct inpcb *t; 361 362 /* GROSS */ 363 if (ntohs(lport) < IPPORT_RESERVED && 364 cred && 365 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0)) { 366 error = EACCES; 367 goto done; 368 } 369 if (so->so_cred->cr_uid != 0 && 370 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 371 t = in_pcblookup_local(pcbinfo, 372 sin->sin_addr, 373 lport, 374 INPLOOKUP_WILDCARD, 375 cred); 376 if (t && 377 (!in_nullhost(sin->sin_addr) || 378 !in_nullhost(t->inp_laddr) || 379 (t->inp_socket->so_options & 380 SO_REUSEPORT) == 0) && 381 (so->so_cred->cr_uid != 382 t->inp_socket->so_cred->cr_uid)) { 383 #ifdef INET6 384 if (!in_nullhost(sin->sin_addr) || 385 !in_nullhost(t->inp_laddr) || 386 INP_SOCKAF(so) == 387 INP_SOCKAF(t->inp_socket)) 388 #endif 389 { 390 error = EADDRINUSE; 391 goto done; 392 } 393 } 394 } 395 if (cred && !prison_replace_wildcards(td, nam)) { 396 error = EADDRNOTAVAIL; 397 goto done; 398 } 399 t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport, 400 wild, cred); 401 if (t && !(reuseport & t->inp_socket->so_options)) { 402 #ifdef INET6 403 if (!in_nullhost(sin->sin_addr) || 404 !in_nullhost(t->inp_laddr) || 405 INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) 406 #endif 407 { 408 error = EADDRINUSE; 409 goto done; 410 } 411 } 412 } 413 inp->inp_laddr = sin->sin_addr; 414 } 415 if (lport == 0) { 416 ushort first, last; 417 int count; 418 419 jsin.sin_family = AF_INET; 420 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 421 if (!prison_replace_wildcards(td, (struct sockaddr *)&jsin)) { 422 inp->inp_laddr.s_addr = INADDR_ANY; 423 error = EINVAL; 424 goto done; 425 } 426 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 427 428 inp->inp_flags |= INP_ANONPORT; 429 430 if (inp->inp_flags & INP_HIGHPORT) { 431 first = ipport_hifirstauto; /* sysctl */ 432 last = ipport_hilastauto; 433 lastport = &pcbinfo->lasthi; 434 } else if (inp->inp_flags & INP_LOWPORT) { 435 if (cred && 436 (error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0))) { 437 inp->inp_laddr.s_addr = INADDR_ANY; 438 goto done; 439 } 440 first = ipport_lowfirstauto; /* 1023 */ 441 last = ipport_lowlastauto; /* 600 */ 442 lastport = &pcbinfo->lastlow; 443 } else { 444 first = ipport_firstauto; /* sysctl */ 445 last = ipport_lastauto; 446 lastport = &pcbinfo->lastport; 447 } 448 /* 449 * Simple check to ensure all ports are not used up causing 450 * a deadlock here. 451 * 452 * We split the two cases (up and down) so that the direction 453 * is not being tested on each round of the loop. 454 */ 455 if (first > last) { 456 /* 457 * counting down 458 */ 459 count = first - last; 460 461 do { 462 if (count-- < 0) { /* completely used? */ 463 inp->inp_laddr.s_addr = INADDR_ANY; 464 error = EADDRNOTAVAIL; 465 goto done; 466 } 467 --*lastport; 468 if (*lastport > first || *lastport < last) 469 *lastport = first; 470 lport = htons(*lastport); 471 } while (in_pcblookup_local(pcbinfo, inp->inp_laddr, 472 lport, wild, cred)); 473 } else { 474 /* 475 * counting up 476 */ 477 count = last - first; 478 479 do { 480 if (count-- < 0) { /* completely used? */ 481 inp->inp_laddr.s_addr = INADDR_ANY; 482 error = EADDRNOTAVAIL; 483 goto done; 484 } 485 ++*lastport; 486 if (*lastport < first || *lastport > last) 487 *lastport = first; 488 lport = htons(*lastport); 489 } while (in_pcblookup_local(pcbinfo, inp->inp_laddr, 490 lport, wild, cred)); 491 } 492 } 493 inp->inp_lport = lport; 494 495 jsin.sin_family = AF_INET; 496 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 497 if (!prison_replace_wildcards(td, (struct sockaddr*)&jsin)) { 498 inp->inp_laddr.s_addr = INADDR_ANY; 499 inp->inp_lport = 0; 500 error = EINVAL; 501 goto done; 502 } 503 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 504 505 if (in_pcbinsporthash(inp) != 0) { 506 inp->inp_laddr.s_addr = INADDR_ANY; 507 inp->inp_lport = 0; 508 error = EAGAIN; 509 goto done; 510 } 511 error = 0; 512 done: 513 if (pcbinfo->porttoken) 514 lwkt_reltoken(pcbinfo->porttoken); 515 return error; 516 } 517 518 static struct inpcb * 519 in_pcblookup_addrport(struct inpcbinfo *pcbinfo, struct in_addr laddr, 520 u_short lport, struct in_addr faddr, u_short fport, struct ucred *cred) 521 { 522 struct inpcb *inp; 523 struct inpcbporthead *porthash; 524 struct inpcbport *phd; 525 struct inpcb *match = NULL; 526 527 /* 528 * If the porthashbase is shared across several cpus we need 529 * to lock. 530 */ 531 if (pcbinfo->porttoken) 532 lwkt_gettoken(pcbinfo->porttoken); 533 534 /* 535 * Best fit PCB lookup. 536 * 537 * First see if this local port is in use by looking on the 538 * port hash list. 539 */ 540 porthash = &pcbinfo->porthashbase[ 541 INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; 542 LIST_FOREACH(phd, porthash, phd_hash) { 543 if (phd->phd_port == lport) 544 break; 545 } 546 if (phd != NULL) { 547 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 548 #ifdef INET6 549 if ((inp->inp_vflag & INP_IPV4) == 0) 550 continue; 551 #endif 552 if (inp->inp_laddr.s_addr != INADDR_ANY && 553 inp->inp_laddr.s_addr != laddr.s_addr) 554 continue; 555 556 if (inp->inp_faddr.s_addr != INADDR_ANY && 557 inp->inp_faddr.s_addr != faddr.s_addr) 558 continue; 559 560 if (inp->inp_fport != 0 && inp->inp_fport != fport) 561 continue; 562 563 if (cred == NULL || 564 cred->cr_prison == 565 inp->inp_socket->so_cred->cr_prison) { 566 match = inp; 567 break; 568 } 569 } 570 } 571 if (pcbinfo->porttoken) 572 lwkt_reltoken(pcbinfo->porttoken); 573 return (match); 574 } 575 576 int 577 in_pcbconn_bind(struct inpcb *inp, const struct sockaddr *nam, 578 struct thread *td) 579 { 580 struct proc *p = td->td_proc; 581 unsigned short *lastport; 582 const struct sockaddr_in *sin = (const struct sockaddr_in *)nam; 583 struct sockaddr_in jsin; 584 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 585 struct ucred *cred = NULL; 586 u_short lport = 0; 587 ushort first, last; 588 int count, error; 589 590 if (TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) /* XXX broken! */ 591 return (EADDRNOTAVAIL); 592 593 KKASSERT(inp->inp_laddr.s_addr != INADDR_ANY); 594 if (inp->inp_lport != 0) 595 return (EINVAL); /* already bound */ 596 597 KKASSERT(p); 598 cred = p->p_ucred; 599 600 /* 601 * This has to be atomic. If the porthash is shared across multiple 602 * protocol threads (aka tcp) then the token will be non-NULL. 603 */ 604 if (pcbinfo->porttoken) 605 lwkt_gettoken(pcbinfo->porttoken); 606 607 jsin.sin_family = AF_INET; 608 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 609 if (!prison_replace_wildcards(td, (struct sockaddr *)&jsin)) { 610 inp->inp_laddr.s_addr = INADDR_ANY; 611 error = EINVAL; 612 goto done; 613 } 614 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 615 616 inp->inp_flags |= INP_ANONPORT; 617 618 if (inp->inp_flags & INP_HIGHPORT) { 619 first = ipport_hifirstauto; /* sysctl */ 620 last = ipport_hilastauto; 621 lastport = &pcbinfo->lasthi; 622 } else if (inp->inp_flags & INP_LOWPORT) { 623 if (cred && 624 (error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0))) { 625 inp->inp_laddr.s_addr = INADDR_ANY; 626 goto done; 627 } 628 first = ipport_lowfirstauto; /* 1023 */ 629 last = ipport_lowlastauto; /* 600 */ 630 lastport = &pcbinfo->lastlow; 631 } else { 632 first = ipport_firstauto; /* sysctl */ 633 last = ipport_lastauto; 634 lastport = &pcbinfo->lastport; 635 } 636 /* 637 * Simple check to ensure all ports are not used up causing 638 * a deadlock here. 639 * 640 * We split the two cases (up and down) so that the direction 641 * is not being tested on each round of the loop. 642 */ 643 if (first > last) { 644 /* 645 * counting down 646 */ 647 count = first - last; 648 649 do { 650 if (count-- < 0) { /* completely used? */ 651 inp->inp_laddr.s_addr = INADDR_ANY; 652 error = EADDRNOTAVAIL; 653 goto done; 654 } 655 --*lastport; 656 if (*lastport > first || *lastport < last) 657 *lastport = first; 658 lport = htons(*lastport); 659 } while (in_pcblookup_addrport(pcbinfo, inp->inp_laddr, lport, 660 sin->sin_addr, sin->sin_port, cred)); 661 } else { 662 /* 663 * counting up 664 */ 665 count = last - first; 666 667 do { 668 if (count-- < 0) { /* completely used? */ 669 inp->inp_laddr.s_addr = INADDR_ANY; 670 error = EADDRNOTAVAIL; 671 goto done; 672 } 673 ++*lastport; 674 if (*lastport < first || *lastport > last) 675 *lastport = first; 676 lport = htons(*lastport); 677 } while (in_pcblookup_addrport(pcbinfo, inp->inp_laddr, lport, 678 sin->sin_addr, sin->sin_port, cred)); 679 } 680 inp->inp_lport = lport; 681 682 jsin.sin_family = AF_INET; 683 jsin.sin_addr.s_addr = inp->inp_laddr.s_addr; 684 if (!prison_replace_wildcards(td, (struct sockaddr*)&jsin)) { 685 inp->inp_laddr.s_addr = INADDR_ANY; 686 inp->inp_lport = 0; 687 error = EINVAL; 688 goto done; 689 } 690 inp->inp_laddr.s_addr = jsin.sin_addr.s_addr; 691 692 if (in_pcbinsporthash(inp) != 0) { 693 inp->inp_laddr.s_addr = INADDR_ANY; 694 inp->inp_lport = 0; 695 error = EAGAIN; 696 goto done; 697 } 698 error = 0; 699 done: 700 if (pcbinfo->porttoken) 701 lwkt_reltoken(pcbinfo->porttoken); 702 return error; 703 } 704 705 /* 706 * Transform old in_pcbconnect() into an inner subroutine for new 707 * in_pcbconnect(): Do some validity-checking on the remote 708 * address (in mbuf 'nam') and then determine local host address 709 * (i.e., which interface) to use to access that remote host. 710 * 711 * This preserves definition of in_pcbconnect(), while supporting a 712 * slightly different version for T/TCP. (This is more than 713 * a bit of a kludge, but cleaning up the internal interfaces would 714 * have forced minor changes in every protocol). 715 */ 716 int 717 in_pcbladdr(struct inpcb *inp, struct sockaddr *nam, 718 struct sockaddr_in **plocal_sin, struct thread *td) 719 { 720 struct in_ifaddr *ia; 721 struct ucred *cred = NULL; 722 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 723 struct sockaddr *jsin; 724 int jailed = 0, alloc_route = 0; 725 726 if (nam->sa_len != sizeof *sin) 727 return (EINVAL); 728 if (sin->sin_family != AF_INET) 729 return (EAFNOSUPPORT); 730 if (sin->sin_port == 0) 731 return (EADDRNOTAVAIL); 732 if (td && td->td_proc && td->td_proc->p_ucred) 733 cred = td->td_proc->p_ucred; 734 if (cred && cred->cr_prison) 735 jailed = 1; 736 if (!TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) { 737 ia = TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia; 738 /* 739 * If the destination address is INADDR_ANY, 740 * use the primary local address. 741 * If the supplied address is INADDR_BROADCAST, 742 * and the primary interface supports broadcast, 743 * choose the broadcast address for that interface. 744 */ 745 if (sin->sin_addr.s_addr == INADDR_ANY) 746 sin->sin_addr = IA_SIN(ia)->sin_addr; 747 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST && 748 (ia->ia_ifp->if_flags & IFF_BROADCAST)) 749 sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr; 750 } 751 if (inp->inp_laddr.s_addr == INADDR_ANY) { 752 struct route *ro; 753 754 ia = NULL; 755 /* 756 * If route is known or can be allocated now, 757 * our src addr is taken from the i/f, else punt. 758 * Note that we should check the address family of the cached 759 * destination, in case of sharing the cache with IPv6. 760 */ 761 ro = &inp->inp_route; 762 if (ro->ro_rt && 763 (!(ro->ro_rt->rt_flags & RTF_UP) || 764 ro->ro_dst.sa_family != AF_INET || 765 satosin(&ro->ro_dst)->sin_addr.s_addr != 766 sin->sin_addr.s_addr || 767 inp->inp_socket->so_options & SO_DONTROUTE)) { 768 RTFREE(ro->ro_rt); 769 ro->ro_rt = NULL; 770 } 771 if (!(inp->inp_socket->so_options & SO_DONTROUTE) && /*XXX*/ 772 (ro->ro_rt == NULL || 773 ro->ro_rt->rt_ifp == NULL)) { 774 /* No route yet, so try to acquire one */ 775 bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); 776 ro->ro_dst.sa_family = AF_INET; 777 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 778 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = 779 sin->sin_addr; 780 rtalloc(ro); 781 alloc_route = 1; 782 } 783 /* 784 * If we found a route, use the address 785 * corresponding to the outgoing interface 786 * unless it is the loopback (in case a route 787 * to our address on another net goes to loopback). 788 */ 789 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) { 790 if (jailed) { 791 if (jailed_ip(cred->cr_prison, 792 ro->ro_rt->rt_ifa->ifa_addr)) { 793 ia = ifatoia(ro->ro_rt->rt_ifa); 794 } 795 } else { 796 ia = ifatoia(ro->ro_rt->rt_ifa); 797 } 798 } 799 if (ia == NULL) { 800 u_short fport = sin->sin_port; 801 802 sin->sin_port = 0; 803 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); 804 if (ia && jailed && !jailed_ip(cred->cr_prison, 805 sintosa(&ia->ia_addr))) 806 ia = NULL; 807 if (ia == NULL) 808 ia = ifatoia(ifa_ifwithnet(sintosa(sin))); 809 if (ia && jailed && !jailed_ip(cred->cr_prison, 810 sintosa(&ia->ia_addr))) 811 ia = NULL; 812 sin->sin_port = fport; 813 if (ia == NULL && 814 !TAILQ_EMPTY(&in_ifaddrheads[mycpuid])) 815 ia = TAILQ_FIRST(&in_ifaddrheads[mycpuid])->ia; 816 if (ia && jailed && !jailed_ip(cred->cr_prison, 817 sintosa(&ia->ia_addr))) 818 ia = NULL; 819 820 if (!jailed && ia == NULL) 821 goto fail; 822 } 823 /* 824 * If the destination address is multicast and an outgoing 825 * interface has been set as a multicast option, use the 826 * address of that interface as our source address. 827 */ 828 if (!jailed && IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && 829 inp->inp_moptions != NULL) { 830 struct ip_moptions *imo; 831 struct ifnet *ifp; 832 833 imo = inp->inp_moptions; 834 if (imo->imo_multicast_ifp != NULL) { 835 struct in_ifaddr_container *iac; 836 837 ifp = imo->imo_multicast_ifp; 838 ia = NULL; 839 TAILQ_FOREACH(iac, 840 &in_ifaddrheads[mycpuid], ia_link) { 841 if (iac->ia->ia_ifp == ifp) { 842 ia = iac->ia; 843 break; 844 } 845 } 846 if (ia == NULL) 847 goto fail; 848 } 849 } 850 /* 851 * Don't do pcblookup call here; return interface in plocal_sin 852 * and exit to caller, that will do the lookup. 853 */ 854 if (ia == NULL && jailed) { 855 if ((jsin = prison_get_nonlocal(cred->cr_prison, AF_INET, NULL)) != NULL || 856 (jsin = prison_get_local(cred->cr_prison, AF_INET, NULL)) != NULL) { 857 *plocal_sin = satosin(jsin); 858 } else { 859 /* IPv6 only Jail */ 860 goto fail; 861 } 862 } else { 863 *plocal_sin = &ia->ia_addr; 864 } 865 } 866 return (0); 867 fail: 868 if (alloc_route) { 869 struct route *ro = &inp->inp_route; 870 871 if (ro->ro_rt != NULL) 872 RTFREE(ro->ro_rt); 873 bzero(ro, sizeof(*ro)); 874 } 875 return (EADDRNOTAVAIL); 876 } 877 878 /* 879 * Outer subroutine: 880 * Connect from a socket to a specified address. 881 * Both address and port must be specified in argument sin. 882 * If don't have a local address for this socket yet, 883 * then pick one. 884 */ 885 int 886 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 887 { 888 struct sockaddr_in *if_sin; 889 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 890 int error; 891 892 /* Call inner routine to assign local interface address. */ 893 if ((error = in_pcbladdr(inp, nam, &if_sin, td)) != 0) 894 return (error); 895 896 if (in_pcblookup_hash(inp->inp_cpcbinfo, sin->sin_addr, sin->sin_port, 897 inp->inp_laddr.s_addr ? 898 inp->inp_laddr : if_sin->sin_addr, 899 inp->inp_lport, FALSE, NULL) != NULL) { 900 return (EADDRINUSE); 901 } 902 if (inp->inp_laddr.s_addr == INADDR_ANY) { 903 if (inp->inp_lport == 0) { 904 error = in_pcbbind(inp, NULL, td); 905 if (error) 906 return (error); 907 } 908 inp->inp_laddr = if_sin->sin_addr; 909 } 910 inp->inp_faddr = sin->sin_addr; 911 inp->inp_fport = sin->sin_port; 912 in_pcbinsconnhash(inp); 913 return (0); 914 } 915 916 void 917 in_pcbdisconnect(struct inpcb *inp) 918 { 919 920 inp->inp_faddr.s_addr = INADDR_ANY; 921 inp->inp_fport = 0; 922 in_pcbremconnhash(inp); 923 if (inp->inp_socket->so_state & SS_NOFDREF) 924 in_pcbdetach(inp); 925 } 926 927 void 928 in_pcbdetach(struct inpcb *inp) 929 { 930 struct socket *so = inp->inp_socket; 931 struct inpcbinfo *ipi = inp->inp_pcbinfo; 932 933 #ifdef IPSEC 934 ipsec4_delete_pcbpolicy(inp); 935 #endif /*IPSEC*/ 936 inp->inp_gencnt = ++ipi->ipi_gencnt; 937 KKASSERT((so->so_state & SS_ASSERTINPROG) == 0); 938 in_pcbremlists(inp); 939 so->so_pcb = NULL; 940 sofree(so); /* remove pcb ref */ 941 if (inp->inp_options) 942 m_free(inp->inp_options); 943 if (inp->inp_route.ro_rt) 944 rtfree(inp->inp_route.ro_rt); 945 ip_freemoptions(inp->inp_moptions); 946 inp->inp_vflag = 0; 947 kfree(inp, M_PCB); 948 } 949 950 /* 951 * The calling convention of in_setsockaddr() and in_setpeeraddr() was 952 * modified to match the pru_sockaddr() and pru_peeraddr() entry points 953 * in struct pr_usrreqs, so that protocols can just reference then directly 954 * without the need for a wrapper function. The socket must have a valid 955 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 956 * except through a kernel programming error, so it is acceptable to panic 957 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 958 * because there actually /is/ a programming error somewhere... XXX) 959 */ 960 int 961 in_setsockaddr(struct socket *so, struct sockaddr **nam) 962 { 963 struct inpcb *inp; 964 struct sockaddr_in *sin; 965 966 /* 967 * Do the malloc first in case it blocks. 968 */ 969 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 970 M_WAITOK | M_ZERO); 971 sin->sin_family = AF_INET; 972 sin->sin_len = sizeof *sin; 973 974 crit_enter(); 975 inp = so->so_pcb; 976 if (!inp) { 977 crit_exit(); 978 kfree(sin, M_SONAME); 979 return (ECONNRESET); 980 } 981 sin->sin_port = inp->inp_lport; 982 sin->sin_addr = inp->inp_laddr; 983 crit_exit(); 984 985 *nam = (struct sockaddr *)sin; 986 return (0); 987 } 988 989 void 990 in_setsockaddr_dispatch(netmsg_t msg) 991 { 992 int error; 993 994 error = in_setsockaddr(msg->base.nm_so, msg->peeraddr.nm_nam); 995 lwkt_replymsg(&msg->lmsg, error); 996 } 997 998 int 999 in_setpeeraddr(struct socket *so, struct sockaddr **nam) 1000 { 1001 struct inpcb *inp; 1002 struct sockaddr_in *sin; 1003 1004 /* 1005 * Do the malloc first in case it blocks. 1006 */ 1007 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 1008 M_WAITOK | M_ZERO); 1009 sin->sin_family = AF_INET; 1010 sin->sin_len = sizeof *sin; 1011 1012 crit_enter(); 1013 inp = so->so_pcb; 1014 if (!inp) { 1015 crit_exit(); 1016 kfree(sin, M_SONAME); 1017 return (ECONNRESET); 1018 } 1019 sin->sin_port = inp->inp_fport; 1020 sin->sin_addr = inp->inp_faddr; 1021 crit_exit(); 1022 1023 *nam = (struct sockaddr *)sin; 1024 return (0); 1025 } 1026 1027 void 1028 in_setpeeraddr_dispatch(netmsg_t msg) 1029 { 1030 int error; 1031 1032 error = in_setpeeraddr(msg->base.nm_so, msg->peeraddr.nm_nam); 1033 lwkt_replymsg(&msg->lmsg, error); 1034 } 1035 1036 void 1037 in_pcbnotifyall(struct inpcbhead *head, struct in_addr faddr, int err, 1038 void (*notify)(struct inpcb *, int)) 1039 { 1040 struct inpcb *inp, *ninp; 1041 1042 /* 1043 * note: if INP_PLACEMARKER is set we must ignore the rest of 1044 * the structure and skip it. 1045 */ 1046 crit_enter(); 1047 LIST_FOREACH_MUTABLE(inp, head, inp_list, ninp) { 1048 if (inp->inp_flags & INP_PLACEMARKER) 1049 continue; 1050 #ifdef INET6 1051 if (!(inp->inp_vflag & INP_IPV4)) 1052 continue; 1053 #endif 1054 if (inp->inp_faddr.s_addr != faddr.s_addr || 1055 inp->inp_socket == NULL) 1056 continue; 1057 (*notify)(inp, err); /* can remove inp from list! */ 1058 } 1059 crit_exit(); 1060 } 1061 1062 void 1063 in_pcbpurgeif0(struct inpcb *head, struct ifnet *ifp) 1064 { 1065 struct inpcb *inp; 1066 struct ip_moptions *imo; 1067 int i, gap; 1068 1069 for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) { 1070 if (inp->inp_flags & INP_PLACEMARKER) 1071 continue; 1072 imo = inp->inp_moptions; 1073 if ((inp->inp_vflag & INP_IPV4) && imo != NULL) { 1074 /* 1075 * Unselect the outgoing interface if it is being 1076 * detached. 1077 */ 1078 if (imo->imo_multicast_ifp == ifp) 1079 imo->imo_multicast_ifp = NULL; 1080 1081 /* 1082 * Drop multicast group membership if we joined 1083 * through the interface being detached. 1084 */ 1085 for (i = 0, gap = 0; i < imo->imo_num_memberships; 1086 i++) { 1087 if (imo->imo_membership[i]->inm_ifp == ifp) { 1088 in_delmulti(imo->imo_membership[i]); 1089 gap++; 1090 } else if (gap != 0) 1091 imo->imo_membership[i - gap] = 1092 imo->imo_membership[i]; 1093 } 1094 imo->imo_num_memberships -= gap; 1095 } 1096 } 1097 } 1098 1099 /* 1100 * Check for alternatives when higher level complains 1101 * about service problems. For now, invalidate cached 1102 * routing information. If the route was created dynamically 1103 * (by a redirect), time to try a default gateway again. 1104 */ 1105 void 1106 in_losing(struct inpcb *inp) 1107 { 1108 struct rtentry *rt; 1109 struct rt_addrinfo rtinfo; 1110 1111 if ((rt = inp->inp_route.ro_rt)) { 1112 bzero(&rtinfo, sizeof(struct rt_addrinfo)); 1113 rtinfo.rti_info[RTAX_DST] = rt_key(rt); 1114 rtinfo.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 1115 rtinfo.rti_info[RTAX_NETMASK] = rt_mask(rt); 1116 rtinfo.rti_flags = rt->rt_flags; 1117 rt_missmsg(RTM_LOSING, &rtinfo, rt->rt_flags, 0); 1118 if (rt->rt_flags & RTF_DYNAMIC) 1119 rtrequest1_global(RTM_DELETE, &rtinfo, NULL, NULL); 1120 inp->inp_route.ro_rt = NULL; 1121 rtfree(rt); 1122 /* 1123 * A new route can be allocated 1124 * the next time output is attempted. 1125 */ 1126 } 1127 } 1128 1129 /* 1130 * After a routing change, flush old routing 1131 * and allocate a (hopefully) better one. 1132 */ 1133 void 1134 in_rtchange(struct inpcb *inp, int err) 1135 { 1136 if (inp->inp_route.ro_rt) { 1137 rtfree(inp->inp_route.ro_rt); 1138 inp->inp_route.ro_rt = NULL; 1139 /* 1140 * A new route can be allocated the next time 1141 * output is attempted. 1142 */ 1143 } 1144 } 1145 1146 /* 1147 * Lookup a PCB based on the local address and port. 1148 */ 1149 struct inpcb * 1150 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, 1151 u_int lport_arg, int wild_okay, struct ucred *cred) 1152 { 1153 struct inpcb *inp; 1154 int matchwild = 3, wildcard; 1155 u_short lport = lport_arg; 1156 struct inpcbporthead *porthash; 1157 struct inpcbport *phd; 1158 struct inpcb *match = NULL; 1159 1160 /* 1161 * If the porthashbase is shared across several cpus we need 1162 * to lock. 1163 */ 1164 if (pcbinfo->porttoken) 1165 lwkt_gettoken(pcbinfo->porttoken); 1166 1167 /* 1168 * Best fit PCB lookup. 1169 * 1170 * First see if this local port is in use by looking on the 1171 * port hash list. 1172 */ 1173 porthash = &pcbinfo->porthashbase[ 1174 INP_PCBPORTHASH(lport, pcbinfo->porthashmask)]; 1175 LIST_FOREACH(phd, porthash, phd_hash) { 1176 if (phd->phd_port == lport) 1177 break; 1178 } 1179 if (phd != NULL) { 1180 /* 1181 * Port is in use by one or more PCBs. Look for best 1182 * fit. 1183 */ 1184 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 1185 wildcard = 0; 1186 #ifdef INET6 1187 if ((inp->inp_vflag & INP_IPV4) == 0) 1188 continue; 1189 #endif 1190 if (inp->inp_faddr.s_addr != INADDR_ANY) 1191 wildcard++; 1192 if (inp->inp_laddr.s_addr != INADDR_ANY) { 1193 if (laddr.s_addr == INADDR_ANY) 1194 wildcard++; 1195 else if (inp->inp_laddr.s_addr != laddr.s_addr) 1196 continue; 1197 } else { 1198 if (laddr.s_addr != INADDR_ANY) 1199 wildcard++; 1200 } 1201 if (wildcard && !wild_okay) 1202 continue; 1203 if (wildcard < matchwild && 1204 (cred == NULL || 1205 cred->cr_prison == 1206 inp->inp_socket->so_cred->cr_prison)) { 1207 match = inp; 1208 matchwild = wildcard; 1209 if (matchwild == 0) { 1210 break; 1211 } 1212 } 1213 } 1214 } 1215 if (pcbinfo->porttoken) 1216 lwkt_reltoken(pcbinfo->porttoken); 1217 return (match); 1218 } 1219 1220 /* 1221 * Lookup PCB in hash list. 1222 */ 1223 struct inpcb * 1224 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1225 u_int fport_arg, struct in_addr laddr, u_int lport_arg, 1226 boolean_t wildcard, struct ifnet *ifp) 1227 { 1228 struct inpcbhead *head; 1229 struct inpcb *inp, *jinp=NULL; 1230 u_short fport = fport_arg, lport = lport_arg; 1231 1232 /* 1233 * First look for an exact match. 1234 */ 1235 head = &pcbinfo->hashbase[INP_PCBCONNHASH(faddr.s_addr, fport, 1236 laddr.s_addr, lport, pcbinfo->hashmask)]; 1237 LIST_FOREACH(inp, head, inp_hash) { 1238 #ifdef INET6 1239 if (!(inp->inp_vflag & INP_IPV4)) 1240 continue; 1241 #endif 1242 if (in_hosteq(inp->inp_faddr, faddr) && 1243 in_hosteq(inp->inp_laddr, laddr) && 1244 inp->inp_fport == fport && inp->inp_lport == lport) { 1245 /* found */ 1246 if (inp->inp_socket == NULL || 1247 inp->inp_socket->so_cred->cr_prison == NULL) { 1248 return (inp); 1249 } else { 1250 if (jinp == NULL) 1251 jinp = inp; 1252 } 1253 } 1254 } 1255 if (jinp != NULL) 1256 return (jinp); 1257 if (wildcard) { 1258 struct inpcb *local_wild = NULL; 1259 struct inpcb *jinp_wild = NULL; 1260 #ifdef INET6 1261 struct inpcb *local_wild_mapped = NULL; 1262 #endif 1263 struct inpcontainer *ic; 1264 struct inpcontainerhead *chead; 1265 struct sockaddr_in jsin; 1266 struct ucred *cred; 1267 1268 /* 1269 * Order of socket selection: 1270 * 1. non-jailed, non-wild. 1271 * 2. non-jailed, wild. 1272 * 3. jailed, non-wild. 1273 * 4. jailed, wild. 1274 */ 1275 jsin.sin_family = AF_INET; 1276 chead = &pcbinfo->wildcardhashbase[ 1277 INP_PCBWILDCARDHASH(lport, pcbinfo->wildcardhashmask)]; 1278 LIST_FOREACH(ic, chead, ic_list) { 1279 inp = ic->ic_inp; 1280 jsin.sin_addr.s_addr = laddr.s_addr; 1281 #ifdef INET6 1282 if (!(inp->inp_vflag & INP_IPV4)) 1283 continue; 1284 #endif 1285 if (inp->inp_socket != NULL) 1286 cred = inp->inp_socket->so_cred; 1287 else 1288 cred = NULL; 1289 if (cred != NULL && jailed(cred)) { 1290 if (jinp != NULL) 1291 continue; 1292 else 1293 if (!jailed_ip(cred->cr_prison, 1294 (struct sockaddr *)&jsin)) 1295 continue; 1296 } 1297 if (inp->inp_lport == lport) { 1298 if (ifp && ifp->if_type == IFT_FAITH && 1299 !(inp->inp_flags & INP_FAITH)) 1300 continue; 1301 if (inp->inp_laddr.s_addr == laddr.s_addr) { 1302 if (cred != NULL && jailed(cred)) 1303 jinp = inp; 1304 else 1305 return (inp); 1306 } 1307 if (inp->inp_laddr.s_addr == INADDR_ANY) { 1308 #ifdef INET6 1309 if (INP_CHECK_SOCKAF(inp->inp_socket, 1310 AF_INET6)) 1311 local_wild_mapped = inp; 1312 else 1313 #endif 1314 if (cred != NULL && 1315 jailed(cred)) 1316 jinp_wild = inp; 1317 else 1318 local_wild = inp; 1319 } 1320 } 1321 } 1322 if (local_wild != NULL) 1323 return (local_wild); 1324 #ifdef INET6 1325 if (local_wild_mapped != NULL) 1326 return (local_wild_mapped); 1327 #endif 1328 if (jinp != NULL) 1329 return (jinp); 1330 return (jinp_wild); 1331 } 1332 1333 /* 1334 * Not found. 1335 */ 1336 return (NULL); 1337 } 1338 1339 /* 1340 * Insert PCB into connection hash table. 1341 */ 1342 void 1343 in_pcbinsconnhash(struct inpcb *inp) 1344 { 1345 struct inpcbinfo *pcbinfo = inp->inp_cpcbinfo; 1346 struct inpcbhead *bucket; 1347 u_int32_t hashkey_faddr, hashkey_laddr; 1348 1349 #ifdef INET6 1350 if (inp->inp_vflag & INP_IPV6) { 1351 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX JH */; 1352 hashkey_laddr = inp->in6p_laddr.s6_addr32[3] /* XXX JH */; 1353 } else { 1354 #endif 1355 hashkey_faddr = inp->inp_faddr.s_addr; 1356 hashkey_laddr = inp->inp_laddr.s_addr; 1357 #ifdef INET6 1358 } 1359 #endif 1360 1361 KASSERT(!(inp->inp_flags & INP_WILDCARD), 1362 ("already on wildcardhash\n")); 1363 KASSERT(!(inp->inp_flags & INP_CONNECTED), 1364 ("already on connhash\n")); 1365 inp->inp_flags |= INP_CONNECTED; 1366 1367 /* 1368 * Insert into the connection hash table. 1369 */ 1370 bucket = &pcbinfo->hashbase[INP_PCBCONNHASH(hashkey_faddr, 1371 inp->inp_fport, hashkey_laddr, inp->inp_lport, pcbinfo->hashmask)]; 1372 LIST_INSERT_HEAD(bucket, inp, inp_hash); 1373 } 1374 1375 /* 1376 * Remove PCB from connection hash table. 1377 */ 1378 void 1379 in_pcbremconnhash(struct inpcb *inp) 1380 { 1381 KASSERT(inp->inp_flags & INP_CONNECTED, ("inp not connected")); 1382 LIST_REMOVE(inp, inp_hash); 1383 inp->inp_flags &= ~INP_CONNECTED; 1384 } 1385 1386 /* 1387 * Insert PCB into port hash table. 1388 */ 1389 int 1390 in_pcbinsporthash(struct inpcb *inp) 1391 { 1392 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1393 struct inpcbporthead *pcbporthash; 1394 struct inpcbport *phd; 1395 1396 /* 1397 * If the porthashbase is shared across several cpus we need 1398 * to lock. 1399 */ 1400 if (pcbinfo->porttoken) 1401 lwkt_gettoken(pcbinfo->porttoken); 1402 1403 /* 1404 * Insert into the port hash table. 1405 */ 1406 pcbporthash = &pcbinfo->porthashbase[ 1407 INP_PCBPORTHASH(inp->inp_lport, pcbinfo->porthashmask)]; 1408 1409 /* Go through port list and look for a head for this lport. */ 1410 LIST_FOREACH(phd, pcbporthash, phd_hash) { 1411 if (phd->phd_port == inp->inp_lport) 1412 break; 1413 } 1414 1415 /* If none exists, malloc one and tack it on. */ 1416 if (phd == NULL) { 1417 KKASSERT(pcbinfo->portsave != NULL); 1418 phd = pcbinfo->portsave; 1419 pcbinfo->portsave = NULL; 1420 phd->phd_port = inp->inp_lport; 1421 LIST_INIT(&phd->phd_pcblist); 1422 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1423 } 1424 1425 inp->inp_phd = phd; 1426 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1427 1428 if (pcbinfo->porttoken) 1429 lwkt_reltoken(pcbinfo->porttoken); 1430 if (pcbinfo->portsave == NULL) { 1431 pcbinfo->portsave = kmalloc(sizeof(*pcbinfo->portsave), 1432 M_PCB, M_INTWAIT | M_ZERO); 1433 } 1434 return (0); 1435 } 1436 1437 void 1438 in_pcbinswildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1439 { 1440 struct inpcontainer *ic; 1441 struct inpcontainerhead *bucket; 1442 1443 bucket = &pcbinfo->wildcardhashbase[ 1444 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1445 1446 ic = kmalloc(sizeof(struct inpcontainer), M_TEMP, M_INTWAIT); 1447 ic->ic_inp = inp; 1448 LIST_INSERT_HEAD(bucket, ic, ic_list); 1449 } 1450 1451 /* 1452 * Insert PCB into wildcard hash table. 1453 */ 1454 void 1455 in_pcbinswildcardhash(struct inpcb *inp) 1456 { 1457 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1458 1459 KASSERT(!(inp->inp_flags & INP_CONNECTED), 1460 ("already on connhash\n")); 1461 KASSERT(!(inp->inp_flags & INP_WILDCARD), 1462 ("already on wildcardhash\n")); 1463 inp->inp_flags |= INP_WILDCARD; 1464 1465 in_pcbinswildcardhash_oncpu(inp, pcbinfo); 1466 } 1467 1468 void 1469 in_pcbremwildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1470 { 1471 struct inpcontainer *ic; 1472 struct inpcontainerhead *head; 1473 1474 /* find bucket */ 1475 head = &pcbinfo->wildcardhashbase[ 1476 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1477 1478 LIST_FOREACH(ic, head, ic_list) { 1479 if (ic->ic_inp == inp) 1480 goto found; 1481 } 1482 return; /* not found! */ 1483 1484 found: 1485 LIST_REMOVE(ic, ic_list); /* remove container from bucket chain */ 1486 kfree(ic, M_TEMP); /* deallocate container */ 1487 } 1488 1489 /* 1490 * Remove PCB from wildcard hash table. 1491 */ 1492 void 1493 in_pcbremwildcardhash(struct inpcb *inp) 1494 { 1495 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1496 1497 KASSERT(inp->inp_flags & INP_WILDCARD, ("inp not wildcard")); 1498 in_pcbremwildcardhash_oncpu(inp, pcbinfo); 1499 inp->inp_flags &= ~INP_WILDCARD; 1500 } 1501 1502 /* 1503 * Remove PCB from various lists. 1504 */ 1505 void 1506 in_pcbremlists(struct inpcb *inp) 1507 { 1508 struct inpcbinfo *pcbinfo; 1509 1510 if (inp->inp_lport) { 1511 struct inpcbport *phd; 1512 1513 pcbinfo = inp->inp_pcbinfo; 1514 if (pcbinfo->porttoken) 1515 lwkt_gettoken(pcbinfo->porttoken); 1516 1517 phd = inp->inp_phd; 1518 LIST_REMOVE(inp, inp_portlist); 1519 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1520 LIST_REMOVE(phd, phd_hash); 1521 kfree(phd, M_PCB); 1522 } 1523 if (pcbinfo->porttoken) 1524 lwkt_reltoken(pcbinfo->porttoken); 1525 } 1526 if (inp->inp_flags & INP_WILDCARD) { 1527 in_pcbremwildcardhash(inp); 1528 } else if (inp->inp_flags & INP_CONNECTED) { 1529 in_pcbremconnhash(inp); 1530 } 1531 LIST_REMOVE(inp, inp_list); 1532 inp->inp_pcbinfo->ipi_count--; 1533 } 1534 1535 int 1536 prison_xinpcb(struct thread *td, struct inpcb *inp) 1537 { 1538 struct ucred *cr; 1539 1540 if (td->td_proc == NULL) 1541 return (0); 1542 cr = td->td_proc->p_ucred; 1543 if (cr->cr_prison == NULL) 1544 return (0); 1545 if (inp->inp_socket && inp->inp_socket->so_cred && 1546 inp->inp_socket->so_cred->cr_prison && 1547 cr->cr_prison == inp->inp_socket->so_cred->cr_prison) 1548 return (0); 1549 return (1); 1550 } 1551 1552 int 1553 in_pcblist_global(SYSCTL_HANDLER_ARGS) 1554 { 1555 struct inpcbinfo *pcbinfo = arg1; 1556 struct inpcb *inp, *marker; 1557 struct xinpcb xi; 1558 int error, i, n; 1559 1560 /* 1561 * The process of preparing the TCB list is too time-consuming and 1562 * resource-intensive to repeat twice on every request. 1563 */ 1564 if (req->oldptr == NULL) { 1565 n = pcbinfo->ipi_count; 1566 req->oldidx = (n + n/8 + 10) * sizeof(struct xinpcb); 1567 return 0; 1568 } 1569 1570 if (req->newptr != NULL) 1571 return EPERM; 1572 1573 /* 1574 * OK, now we're committed to doing something. Re-fetch ipi_count 1575 * after obtaining the generation count. 1576 */ 1577 n = pcbinfo->ipi_count; 1578 1579 marker = kmalloc(sizeof(struct inpcb), M_TEMP, M_WAITOK|M_ZERO); 1580 marker->inp_flags |= INP_PLACEMARKER; 1581 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list); 1582 1583 i = 0; 1584 error = 0; 1585 1586 while ((inp = LIST_NEXT(marker, inp_list)) != NULL && i < n) { 1587 LIST_REMOVE(marker, inp_list); 1588 LIST_INSERT_AFTER(inp, marker, inp_list); 1589 1590 if (inp->inp_flags & INP_PLACEMARKER) 1591 continue; 1592 if (prison_xinpcb(req->td, inp)) 1593 continue; 1594 bzero(&xi, sizeof xi); 1595 xi.xi_len = sizeof xi; 1596 bcopy(inp, &xi.xi_inp, sizeof *inp); 1597 if (inp->inp_socket) 1598 sotoxsocket(inp->inp_socket, &xi.xi_socket); 1599 if ((error = SYSCTL_OUT(req, &xi, sizeof xi)) != 0) 1600 break; 1601 ++i; 1602 } 1603 LIST_REMOVE(marker, inp_list); 1604 if (error == 0 && i < n) { 1605 bzero(&xi, sizeof xi); 1606 xi.xi_len = sizeof xi; 1607 while (i < n) { 1608 error = SYSCTL_OUT(req, &xi, sizeof xi); 1609 ++i; 1610 } 1611 } 1612 kfree(marker, M_TEMP); 1613 return(error); 1614 } 1615 1616 int 1617 in_pcblist_global_nomarker(SYSCTL_HANDLER_ARGS, struct xinpcb **xi0, int *nxi0) 1618 { 1619 struct inpcbinfo *pcbinfo = arg1; 1620 struct inpcb *inp; 1621 struct xinpcb *xi; 1622 int nxi; 1623 1624 *nxi0 = 0; 1625 *xi0 = NULL; 1626 1627 /* 1628 * The process of preparing the PCB list is too time-consuming and 1629 * resource-intensive to repeat twice on every request. 1630 */ 1631 if (req->oldptr == NULL) { 1632 int n = pcbinfo->ipi_count; 1633 1634 req->oldidx = (n + n/8 + 10) * sizeof(struct xinpcb); 1635 return 0; 1636 } 1637 1638 if (req->newptr != NULL) 1639 return EPERM; 1640 1641 if (pcbinfo->ipi_count == 0) 1642 return 0; 1643 1644 nxi = 0; 1645 xi = kmalloc(pcbinfo->ipi_count * sizeof(*xi), M_TEMP, 1646 M_WAITOK | M_ZERO | M_NULLOK); 1647 if (xi == NULL) 1648 return ENOMEM; 1649 1650 LIST_FOREACH(inp, &pcbinfo->pcblisthead, inp_list) { 1651 struct xinpcb *xi_ptr = &xi[nxi]; 1652 1653 if (prison_xinpcb(req->td, inp)) 1654 continue; 1655 1656 xi_ptr->xi_len = sizeof(*xi_ptr); 1657 bcopy(inp, &xi_ptr->xi_inp, sizeof(*inp)); 1658 if (inp->inp_socket) 1659 sotoxsocket(inp->inp_socket, &xi_ptr->xi_socket); 1660 ++nxi; 1661 } 1662 1663 if (nxi == 0) { 1664 kfree(xi, M_TEMP); 1665 return 0; 1666 } 1667 1668 *nxi0 = nxi; 1669 *xi0 = xi; 1670 1671 return 0; 1672 } 1673