1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 36 * 37 * License terms: all terms for the DragonFly license above plus the following: 38 * 39 * 4. All advertising materials mentioning features or use of this software 40 * must display the following acknowledgement: 41 * 42 * This product includes software developed by Jeffrey M. Hsu 43 * for the DragonFly Project. 44 * 45 * This requirement may be waived with permission from Jeffrey Hsu. 46 * This requirement will sunset and may be removed on July 8 2005, 47 * after which the standard DragonFly license (as shown above) will 48 * apply. 49 */ 50 51 /* 52 * Copyright (c) 1982, 1986, 1991, 1993, 1995 53 * The Regents of the University of California. All rights reserved. 54 * 55 * Redistribution and use in source and binary forms, with or without 56 * modification, are permitted provided that the following conditions 57 * are met: 58 * 1. Redistributions of source code must retain the above copyright 59 * notice, this list of conditions and the following disclaimer. 60 * 2. Redistributions in binary form must reproduce the above copyright 61 * notice, this list of conditions and the following disclaimer in the 62 * documentation and/or other materials provided with the distribution. 63 * 3. All advertising materials mentioning features or use of this software 64 * must display the following acknowledgement: 65 * This product includes software developed by the University of 66 * California, Berkeley and its contributors. 67 * 4. Neither the name of the University nor the names of its contributors 68 * may be used to endorse or promote products derived from this software 69 * without specific prior written permission. 70 * 71 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 72 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 73 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 74 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 75 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 76 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 77 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 78 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 79 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 80 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 81 * SUCH DAMAGE. 82 * 83 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 84 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.27 2004/01/02 04:06:42 ambrisko Exp $ 85 * $DragonFly: src/sys/netinet/in_pcb.c,v 1.28 2004/12/20 11:03:16 joerg Exp $ 86 */ 87 88 #include "opt_ipsec.h" 89 #include "opt_inet6.h" 90 91 #include <sys/param.h> 92 #include <sys/systm.h> 93 #include <sys/malloc.h> 94 #include <sys/mbuf.h> 95 #include <sys/domain.h> 96 #include <sys/protosw.h> 97 #include <sys/socket.h> 98 #include <sys/socketvar.h> 99 #include <sys/proc.h> 100 #include <sys/jail.h> 101 #include <sys/kernel.h> 102 #include <sys/sysctl.h> 103 104 #include <machine/limits.h> 105 106 #include <vm/vm_zone.h> 107 108 #include <net/if.h> 109 #include <net/if_types.h> 110 #include <net/route.h> 111 112 #include <netinet/in.h> 113 #include <netinet/in_pcb.h> 114 #include <netinet/in_var.h> 115 #include <netinet/ip_var.h> 116 #ifdef INET6 117 #include <netinet/ip6.h> 118 #include <netinet6/ip6_var.h> 119 #endif /* INET6 */ 120 121 #ifdef IPSEC 122 #include <netinet6/ipsec.h> 123 #include <netproto/key/key.h> 124 #endif 125 126 #ifdef FAST_IPSEC 127 #if defined(IPSEC) || defined(IPSEC_ESP) 128 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" 129 #endif 130 131 #include <netproto/ipsec/ipsec.h> 132 #include <netproto/ipsec/key.h> 133 #define IPSEC 134 #endif /* FAST_IPSEC */ 135 136 struct in_addr zeroin_addr; 137 138 /* 139 * These configure the range of local port addresses assigned to 140 * "unspecified" outgoing connections/packets/whatever. 141 */ 142 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 143 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 144 145 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */ 146 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */ 147 148 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 149 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 150 151 /* Allocate ephermal source ports in random order. */ 152 int ipport_randomized = 1; 153 154 static __inline void 155 RANGECHK(int var, int min, int max) 156 { 157 if (var < min) 158 var = min; 159 else if (var > max) 160 var = max; 161 } 162 163 static int 164 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 165 { 166 int error; 167 168 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 169 if (!error) { 170 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 171 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 172 173 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 174 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 175 176 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 177 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 178 } 179 return (error); 180 } 181 182 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 183 184 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 185 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 186 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 187 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 188 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 189 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 190 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 191 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 192 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 193 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 194 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 195 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 196 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized, CTLFLAG_RW, 197 &ipport_randomized, 0, ""); 198 199 /* 200 * in_pcb.c: manage the Protocol Control Blocks. 201 * 202 * NOTE: It is assumed that most of these functions will be called at 203 * splnet(). XXX - There are, unfortunately, a few exceptions to this 204 * rule that should be fixed. 205 * 206 * NOTE: The caller should initialize the cpu field to the cpu running the 207 * protocol stack associated with this inpcbinfo. 208 */ 209 210 void 211 in_pcbinfo_init(struct inpcbinfo *pcbinfo) 212 { 213 LIST_INIT(&pcbinfo->pcblisthead); 214 pcbinfo->cpu = -1; 215 } 216 217 /* 218 * Allocate a PCB and associate it with the socket. 219 */ 220 int 221 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) 222 { 223 struct inpcb *inp; 224 #ifdef IPSEC 225 int error; 226 #endif 227 228 inp = zalloc(pcbinfo->ipi_zone); 229 if (inp == NULL) 230 return (ENOBUFS); 231 bzero((caddr_t)inp, sizeof *inp); 232 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 233 inp->inp_pcbinfo = inp->inp_cpcbinfo = pcbinfo; 234 inp->inp_socket = so; 235 #ifdef IPSEC 236 error = ipsec_init_policy(so, &inp->inp_sp); 237 if (error != 0) { 238 zfree(pcbinfo->ipi_zone, inp); 239 return (error); 240 } 241 #endif 242 #ifdef INET6 243 if (INP_SOCKAF(so) == AF_INET6 && ip6_v6only) 244 inp->inp_flags |= IN6P_IPV6_V6ONLY; 245 if (ip6_auto_flowlabel) 246 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 247 #endif 248 so->so_pcb = (caddr_t)inp; 249 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list); 250 pcbinfo->ipi_count++; 251 return (0); 252 } 253 254 int 255 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 256 { 257 struct socket *so = inp->inp_socket; 258 struct proc *p = td->td_proc; 259 unsigned short *lastport; 260 struct sockaddr_in *sin; 261 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 262 u_short lport = 0; 263 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 264 int error, prison = 0; 265 266 KKASSERT(p); 267 268 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ 269 return (EADDRNOTAVAIL); 270 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 271 return (EINVAL); /* already bound */ 272 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) 273 wild = 1; /* neither SO_REUSEADDR nor SO_REUSEPORT is set */ 274 if (nam != NULL) { 275 sin = (struct sockaddr_in *)nam; 276 if (nam->sa_len != sizeof *sin) 277 return (EINVAL); 278 #ifdef notdef 279 /* 280 * We should check the family, but old programs 281 * incorrectly fail to initialize it. 282 */ 283 if (sin->sin_family != AF_INET) 284 return (EAFNOSUPPORT); 285 #endif 286 if (sin->sin_addr.s_addr != INADDR_ANY && 287 prison_ip(td, 0, &sin->sin_addr.s_addr)) 288 return (EINVAL); 289 lport = sin->sin_port; 290 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 291 /* 292 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 293 * allow complete duplication of binding if 294 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 295 * and a multicast address is bound on both 296 * new and duplicated sockets. 297 */ 298 if (so->so_options & SO_REUSEADDR) 299 reuseport = SO_REUSEADDR | SO_REUSEPORT; 300 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 301 sin->sin_port = 0; /* yech... */ 302 bzero(&sin->sin_zero, sizeof sin->sin_zero); 303 if (ifa_ifwithaddr((struct sockaddr *)sin) == NULL) 304 return (EADDRNOTAVAIL); 305 } 306 if (lport != 0) { 307 struct inpcb *t; 308 309 /* GROSS */ 310 if (ntohs(lport) < IPPORT_RESERVED && 311 p && suser_cred(p->p_ucred, PRISON_ROOT)) 312 return (EACCES); 313 if (p && p->p_ucred->cr_prison) 314 prison = 1; 315 if (so->so_cred->cr_uid != 0 && 316 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 317 t = in_pcblookup_local(inp->inp_pcbinfo, 318 sin->sin_addr, lport, 319 prison ? 0 : INPLOOKUP_WILDCARD); 320 if (t && 321 (!in_nullhost(sin->sin_addr) || 322 !in_nullhost(t->inp_laddr) || 323 (t->inp_socket->so_options & 324 SO_REUSEPORT) == 0) && 325 (so->so_cred->cr_uid != 326 t->inp_socket->so_cred->cr_uid)) { 327 #ifdef INET6 328 if (!in_nullhost(sin->sin_addr) || 329 !in_nullhost(t->inp_laddr) || 330 INP_SOCKAF(so) == 331 INP_SOCKAF(t->inp_socket)) 332 #endif 333 return (EADDRINUSE); 334 } 335 } 336 if (prison && prison_ip(td, 0, &sin->sin_addr.s_addr)) 337 return (EADDRNOTAVAIL); 338 t = in_pcblookup_local(pcbinfo, sin->sin_addr, 339 lport, prison ? 0 : wild); 340 if (t && !(reuseport & t->inp_socket->so_options)) { 341 #ifdef INET6 342 if (!in_nullhost(sin->sin_addr) || 343 !in_nullhost(t->inp_laddr) || 344 INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) 345 #endif 346 return (EADDRINUSE); 347 } 348 } 349 inp->inp_laddr = sin->sin_addr; 350 } 351 if (lport == 0) { 352 ushort first, last; 353 int count; 354 355 if (inp->inp_laddr.s_addr != INADDR_ANY && 356 prison_ip(td, 0, &inp->inp_laddr.s_addr )) { 357 inp->inp_laddr.s_addr = INADDR_ANY; 358 return (EINVAL); 359 } 360 inp->inp_flags |= INP_ANONPORT; 361 362 if (inp->inp_flags & INP_HIGHPORT) { 363 first = ipport_hifirstauto; /* sysctl */ 364 last = ipport_hilastauto; 365 lastport = &pcbinfo->lasthi; 366 } else if (inp->inp_flags & INP_LOWPORT) { 367 if (p && 368 (error = suser_cred(p->p_ucred, PRISON_ROOT))) { 369 inp->inp_laddr.s_addr = INADDR_ANY; 370 return (error); 371 } 372 first = ipport_lowfirstauto; /* 1023 */ 373 last = ipport_lowlastauto; /* 600 */ 374 lastport = &pcbinfo->lastlow; 375 } else { 376 first = ipport_firstauto; /* sysctl */ 377 last = ipport_lastauto; 378 lastport = &pcbinfo->lastport; 379 } 380 /* 381 * Simple check to ensure all ports are not used up causing 382 * a deadlock here. 383 * 384 * We split the two cases (up and down) so that the direction 385 * is not being tested on each round of the loop. 386 */ 387 if (first > last) { 388 /* 389 * counting down 390 */ 391 if (ipport_randomized) 392 *lastport = first - 393 (arc4random() % (first - last)); 394 count = first - last; 395 396 do { 397 if (count-- < 0) { /* completely used? */ 398 inp->inp_laddr.s_addr = INADDR_ANY; 399 return (EADDRNOTAVAIL); 400 } 401 --*lastport; 402 if (*lastport > first || *lastport < last) 403 *lastport = first; 404 lport = htons(*lastport); 405 } while (in_pcblookup_local(pcbinfo, 406 inp->inp_laddr, lport, wild)); 407 } else { 408 /* 409 * counting up 410 */ 411 if (ipport_randomized) 412 *lastport = first + 413 (arc4random() % (last - first)); 414 count = last - first; 415 416 do { 417 if (count-- < 0) { /* completely used? */ 418 inp->inp_laddr.s_addr = INADDR_ANY; 419 return (EADDRNOTAVAIL); 420 } 421 ++*lastport; 422 if (*lastport < first || *lastport > last) 423 *lastport = first; 424 lport = htons(*lastport); 425 } while (in_pcblookup_local(pcbinfo, 426 inp->inp_laddr, lport, wild)); 427 } 428 } 429 inp->inp_lport = lport; 430 if (prison_ip(td, 0, &inp->inp_laddr.s_addr)) { 431 inp->inp_laddr.s_addr = INADDR_ANY; 432 inp->inp_lport = 0; 433 return (EINVAL); 434 } 435 if (in_pcbinsporthash(inp) != 0) { 436 inp->inp_laddr.s_addr = INADDR_ANY; 437 inp->inp_lport = 0; 438 return (EAGAIN); 439 } 440 return (0); 441 } 442 443 /* 444 * Transform old in_pcbconnect() into an inner subroutine for new 445 * in_pcbconnect(): Do some validity-checking on the remote 446 * address (in mbuf 'nam') and then determine local host address 447 * (i.e., which interface) to use to access that remote host. 448 * 449 * This preserves definition of in_pcbconnect(), while supporting a 450 * slightly different version for T/TCP. (This is more than 451 * a bit of a kludge, but cleaning up the internal interfaces would 452 * have forced minor changes in every protocol). 453 */ 454 int 455 in_pcbladdr(inp, nam, plocal_sin) 456 struct inpcb *inp; 457 struct sockaddr *nam; 458 struct sockaddr_in **plocal_sin; 459 { 460 struct in_ifaddr *ia; 461 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 462 463 if (nam->sa_len != sizeof *sin) 464 return (EINVAL); 465 if (sin->sin_family != AF_INET) 466 return (EAFNOSUPPORT); 467 if (sin->sin_port == 0) 468 return (EADDRNOTAVAIL); 469 if (!TAILQ_EMPTY(&in_ifaddrhead)) { 470 ia = TAILQ_FIRST(&in_ifaddrhead); 471 /* 472 * If the destination address is INADDR_ANY, 473 * use the primary local address. 474 * If the supplied address is INADDR_BROADCAST, 475 * and the primary interface supports broadcast, 476 * choose the broadcast address for that interface. 477 */ 478 if (sin->sin_addr.s_addr == INADDR_ANY) 479 sin->sin_addr = IA_SIN(ia)->sin_addr; 480 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST && 481 (ia->ia_ifp->if_flags & IFF_BROADCAST)) 482 sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr; 483 } 484 if (inp->inp_laddr.s_addr == INADDR_ANY) { 485 struct route *ro; 486 487 ia = (struct in_ifaddr *)NULL; 488 /* 489 * If route is known or can be allocated now, 490 * our src addr is taken from the i/f, else punt. 491 * Note that we should check the address family of the cached 492 * destination, in case of sharing the cache with IPv6. 493 */ 494 ro = &inp->inp_route; 495 if (ro->ro_rt && 496 (!(ro->ro_rt->rt_flags & RTF_UP) || 497 ro->ro_dst.sa_family != AF_INET || 498 satosin(&ro->ro_dst)->sin_addr.s_addr != 499 sin->sin_addr.s_addr || 500 inp->inp_socket->so_options & SO_DONTROUTE)) { 501 RTFREE(ro->ro_rt); 502 ro->ro_rt = (struct rtentry *)NULL; 503 } 504 if (!(inp->inp_socket->so_options & SO_DONTROUTE) && /*XXX*/ 505 (ro->ro_rt == (struct rtentry *)NULL || 506 ro->ro_rt->rt_ifp == (struct ifnet *)NULL)) { 507 /* No route yet, so try to acquire one */ 508 bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); 509 ro->ro_dst.sa_family = AF_INET; 510 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 511 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = 512 sin->sin_addr; 513 rtalloc(ro); 514 } 515 /* 516 * If we found a route, use the address 517 * corresponding to the outgoing interface 518 * unless it is the loopback (in case a route 519 * to our address on another net goes to loopback). 520 */ 521 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) 522 ia = ifatoia(ro->ro_rt->rt_ifa); 523 if (ia == NULL) { 524 u_short fport = sin->sin_port; 525 526 sin->sin_port = 0; 527 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); 528 if (ia == NULL) 529 ia = ifatoia(ifa_ifwithnet(sintosa(sin))); 530 sin->sin_port = fport; 531 if (ia == NULL) 532 ia = TAILQ_FIRST(&in_ifaddrhead); 533 if (ia == NULL) 534 return (EADDRNOTAVAIL); 535 } 536 /* 537 * If the destination address is multicast and an outgoing 538 * interface has been set as a multicast option, use the 539 * address of that interface as our source address. 540 */ 541 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && 542 inp->inp_moptions != NULL) { 543 struct ip_moptions *imo; 544 struct ifnet *ifp; 545 546 imo = inp->inp_moptions; 547 if (imo->imo_multicast_ifp != NULL) { 548 ifp = imo->imo_multicast_ifp; 549 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) 550 if (ia->ia_ifp == ifp) 551 break; 552 if (ia == NULL) 553 return (EADDRNOTAVAIL); 554 } 555 } 556 /* 557 * Don't do pcblookup call here; return interface in plocal_sin 558 * and exit to caller, that will do the lookup. 559 */ 560 *plocal_sin = &ia->ia_addr; 561 562 } 563 return (0); 564 } 565 566 /* 567 * Outer subroutine: 568 * Connect from a socket to a specified address. 569 * Both address and port must be specified in argument sin. 570 * If don't have a local address for this socket yet, 571 * then pick one. 572 */ 573 int 574 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 575 { 576 struct sockaddr_in *if_sin; 577 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 578 struct sockaddr_in sa; 579 struct ucred *cr = td->td_proc ? td->td_proc->p_ucred : NULL; 580 int error; 581 582 if (cr && cr->cr_prison != NULL && in_nullhost(inp->inp_laddr)) { 583 bzero(&sa, sizeof sa); 584 sa.sin_addr.s_addr = htonl(cr->cr_prison->pr_ip); 585 sa.sin_len = sizeof sa; 586 sa.sin_family = AF_INET; 587 error = in_pcbbind(inp, (struct sockaddr *)&sa, td); 588 if (error) 589 return (error); 590 } 591 592 /* Call inner routine to assign local interface address. */ 593 if ((error = in_pcbladdr(inp, nam, &if_sin)) != 0) 594 return (error); 595 596 if (in_pcblookup_hash(inp->inp_cpcbinfo, sin->sin_addr, sin->sin_port, 597 inp->inp_laddr.s_addr ? inp->inp_laddr : if_sin->sin_addr, 598 inp->inp_lport, FALSE, NULL) != NULL) { 599 return (EADDRINUSE); 600 } 601 if (inp->inp_laddr.s_addr == INADDR_ANY) { 602 if (inp->inp_lport == 0) { 603 error = in_pcbbind(inp, (struct sockaddr *)NULL, td); 604 if (error) 605 return (error); 606 } 607 inp->inp_laddr = if_sin->sin_addr; 608 } 609 inp->inp_faddr = sin->sin_addr; 610 inp->inp_fport = sin->sin_port; 611 in_pcbinsconnhash(inp); 612 return (0); 613 } 614 615 void 616 in_pcbdisconnect(inp) 617 struct inpcb *inp; 618 { 619 620 inp->inp_faddr.s_addr = INADDR_ANY; 621 inp->inp_fport = 0; 622 in_pcbremconnhash(inp); 623 if (inp->inp_socket->so_state & SS_NOFDREF) 624 in_pcbdetach(inp); 625 } 626 627 void 628 in_pcbdetach(inp) 629 struct inpcb *inp; 630 { 631 struct socket *so = inp->inp_socket; 632 struct inpcbinfo *ipi = inp->inp_pcbinfo; 633 634 #ifdef IPSEC 635 ipsec4_delete_pcbpolicy(inp); 636 #endif /*IPSEC*/ 637 inp->inp_gencnt = ++ipi->ipi_gencnt; 638 in_pcbremlists(inp); 639 so->so_pcb = 0; 640 sofree(so); 641 if (inp->inp_options) 642 (void)m_free(inp->inp_options); 643 if (inp->inp_route.ro_rt) 644 rtfree(inp->inp_route.ro_rt); 645 ip_freemoptions(inp->inp_moptions); 646 inp->inp_vflag = 0; 647 zfree(ipi->ipi_zone, inp); 648 } 649 650 /* 651 * The calling convention of in_setsockaddr() and in_setpeeraddr() was 652 * modified to match the pru_sockaddr() and pru_peeraddr() entry points 653 * in struct pr_usrreqs, so that protocols can just reference then directly 654 * without the need for a wrapper function. The socket must have a valid 655 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 656 * except through a kernel programming error, so it is acceptable to panic 657 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 658 * because there actually /is/ a programming error somewhere... XXX) 659 */ 660 int 661 in_setsockaddr(so, nam) 662 struct socket *so; 663 struct sockaddr **nam; 664 { 665 int s; 666 struct inpcb *inp; 667 struct sockaddr_in *sin; 668 669 /* 670 * Do the malloc first in case it blocks. 671 */ 672 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 673 M_WAITOK | M_ZERO); 674 sin->sin_family = AF_INET; 675 sin->sin_len = sizeof *sin; 676 677 s = splnet(); 678 inp = sotoinpcb(so); 679 if (!inp) { 680 splx(s); 681 free(sin, M_SONAME); 682 return (ECONNRESET); 683 } 684 sin->sin_port = inp->inp_lport; 685 sin->sin_addr = inp->inp_laddr; 686 splx(s); 687 688 *nam = (struct sockaddr *)sin; 689 return (0); 690 } 691 692 int 693 in_setpeeraddr(so, nam) 694 struct socket *so; 695 struct sockaddr **nam; 696 { 697 int s; 698 struct inpcb *inp; 699 struct sockaddr_in *sin; 700 701 /* 702 * Do the malloc first in case it blocks. 703 */ 704 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 705 M_WAITOK | M_ZERO); 706 sin->sin_family = AF_INET; 707 sin->sin_len = sizeof *sin; 708 709 s = splnet(); 710 inp = sotoinpcb(so); 711 if (!inp) { 712 splx(s); 713 free(sin, M_SONAME); 714 return (ECONNRESET); 715 } 716 sin->sin_port = inp->inp_fport; 717 sin->sin_addr = inp->inp_faddr; 718 splx(s); 719 720 *nam = (struct sockaddr *)sin; 721 return (0); 722 } 723 724 void 725 in_pcbnotifyall(head, faddr, errno, notify) 726 struct inpcbhead *head; 727 struct in_addr faddr; 728 void (*notify) (struct inpcb *, int); 729 { 730 struct inpcb *inp, *ninp; 731 int s; 732 733 /* 734 * note: if INP_PLACEMARKER is set we must ignore the rest of 735 * the structure and skip it. 736 */ 737 s = splnet(); 738 for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { 739 ninp = LIST_NEXT(inp, inp_list); 740 if (inp->inp_flags & INP_PLACEMARKER) 741 continue; 742 #ifdef INET6 743 if (!(inp->inp_vflag & INP_IPV4)) 744 continue; 745 #endif 746 if (inp->inp_faddr.s_addr != faddr.s_addr || 747 inp->inp_socket == NULL) 748 continue; 749 (*notify)(inp, errno); 750 } 751 splx(s); 752 } 753 754 void 755 in_pcbpurgeif0(head, ifp) 756 struct inpcb *head; 757 struct ifnet *ifp; 758 { 759 struct inpcb *inp; 760 struct ip_moptions *imo; 761 int i, gap; 762 763 for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) { 764 if (inp->inp_flags & INP_PLACEMARKER) 765 continue; 766 imo = inp->inp_moptions; 767 if ((inp->inp_vflag & INP_IPV4) && imo != NULL) { 768 /* 769 * Unselect the outgoing interface if it is being 770 * detached. 771 */ 772 if (imo->imo_multicast_ifp == ifp) 773 imo->imo_multicast_ifp = NULL; 774 775 /* 776 * Drop multicast group membership if we joined 777 * through the interface being detached. 778 */ 779 for (i = 0, gap = 0; i < imo->imo_num_memberships; 780 i++) { 781 if (imo->imo_membership[i]->inm_ifp == ifp) { 782 in_delmulti(imo->imo_membership[i]); 783 gap++; 784 } else if (gap != 0) 785 imo->imo_membership[i - gap] = 786 imo->imo_membership[i]; 787 } 788 imo->imo_num_memberships -= gap; 789 } 790 } 791 } 792 793 /* 794 * Check for alternatives when higher level complains 795 * about service problems. For now, invalidate cached 796 * routing information. If the route was created dynamically 797 * (by a redirect), time to try a default gateway again. 798 */ 799 void 800 in_losing(inp) 801 struct inpcb *inp; 802 { 803 struct rtentry *rt; 804 struct rt_addrinfo info; 805 806 if ((rt = inp->inp_route.ro_rt)) { 807 bzero((caddr_t)&info, sizeof info); 808 info.rti_flags = rt->rt_flags; 809 info.rti_info[RTAX_DST] = rt_key(rt); 810 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 811 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 812 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); 813 if (rt->rt_flags & RTF_DYNAMIC) 814 (void) rtrequest1(RTM_DELETE, &info, NULL); 815 inp->inp_route.ro_rt = NULL; 816 rtfree(rt); 817 /* 818 * A new route can be allocated 819 * the next time output is attempted. 820 */ 821 } 822 } 823 824 /* 825 * After a routing change, flush old routing 826 * and allocate a (hopefully) better one. 827 */ 828 void 829 in_rtchange(inp, errno) 830 struct inpcb *inp; 831 int errno; 832 { 833 if (inp->inp_route.ro_rt) { 834 rtfree(inp->inp_route.ro_rt); 835 inp->inp_route.ro_rt = 0; 836 /* 837 * A new route can be allocated the next time 838 * output is attempted. 839 */ 840 } 841 } 842 843 /* 844 * Lookup a PCB based on the local address and port. 845 */ 846 struct inpcb * 847 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) 848 struct inpcbinfo *pcbinfo; 849 struct in_addr laddr; 850 u_int lport_arg; 851 int wild_okay; 852 { 853 struct inpcb *inp; 854 int matchwild = 3, wildcard; 855 u_short lport = lport_arg; 856 857 struct inpcbporthead *porthash; 858 struct inpcbport *phd; 859 struct inpcb *match = NULL; 860 861 /* 862 * Best fit PCB lookup. 863 * 864 * First see if this local port is in use by looking on the 865 * port hash list. 866 */ 867 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, 868 pcbinfo->porthashmask)]; 869 LIST_FOREACH(phd, porthash, phd_hash) { 870 if (phd->phd_port == lport) 871 break; 872 } 873 if (phd != NULL) { 874 /* 875 * Port is in use by one or more PCBs. Look for best 876 * fit. 877 */ 878 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 879 wildcard = 0; 880 #ifdef INET6 881 if ((inp->inp_vflag & INP_IPV4) == 0) 882 continue; 883 #endif 884 if (inp->inp_faddr.s_addr != INADDR_ANY) 885 wildcard++; 886 if (inp->inp_laddr.s_addr != INADDR_ANY) { 887 if (laddr.s_addr == INADDR_ANY) 888 wildcard++; 889 else if (inp->inp_laddr.s_addr != laddr.s_addr) 890 continue; 891 } else { 892 if (laddr.s_addr != INADDR_ANY) 893 wildcard++; 894 } 895 if (wildcard && !wild_okay) 896 continue; 897 if (wildcard < matchwild) { 898 match = inp; 899 matchwild = wildcard; 900 if (matchwild == 0) { 901 break; 902 } 903 } 904 } 905 } 906 return (match); 907 } 908 909 /* 910 * Lookup PCB in hash list. 911 */ 912 struct inpcb * 913 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp) 914 struct inpcbinfo *pcbinfo; 915 struct in_addr faddr, laddr; 916 u_int fport_arg, lport_arg; 917 boolean_t wildcard; 918 struct ifnet *ifp; 919 { 920 struct inpcbhead *head; 921 struct inpcb *inp; 922 u_short fport = fport_arg, lport = lport_arg; 923 924 /* 925 * First look for an exact match. 926 */ 927 head = &pcbinfo->hashbase[INP_PCBCONNHASH(faddr.s_addr, fport, 928 laddr.s_addr, lport, pcbinfo->hashmask)]; 929 LIST_FOREACH(inp, head, inp_hash) { 930 #ifdef INET6 931 if (!(inp->inp_vflag & INP_IPV4)) 932 continue; 933 #endif 934 if (in_hosteq(inp->inp_faddr, faddr) && 935 in_hosteq(inp->inp_laddr, laddr) && 936 inp->inp_fport == fport && inp->inp_lport == lport) { 937 /* found */ 938 return (inp); 939 } 940 } 941 942 if (wildcard) { 943 struct inpcb *local_wild = NULL; 944 #ifdef INET6 945 struct inpcb *local_wild_mapped = NULL; 946 #endif 947 struct inpcontainer *ic; 948 struct inpcontainerhead *chead; 949 950 chead = &pcbinfo->wildcardhashbase[ 951 INP_PCBWILDCARDHASH(lport, pcbinfo->wildcardhashmask)]; 952 LIST_FOREACH(ic, chead, ic_list) { 953 inp = ic->ic_inp; 954 #ifdef INET6 955 if (!(inp->inp_vflag & INP_IPV4)) 956 continue; 957 #endif 958 if (inp->inp_lport == lport) { 959 if (ifp && ifp->if_type == IFT_FAITH && 960 !(inp->inp_flags & INP_FAITH)) 961 continue; 962 if (inp->inp_laddr.s_addr == laddr.s_addr) 963 return (inp); 964 if (inp->inp_laddr.s_addr == INADDR_ANY) { 965 #ifdef INET6 966 if (INP_CHECK_SOCKAF(inp->inp_socket, 967 AF_INET6)) 968 local_wild_mapped = inp; 969 else 970 #endif 971 local_wild = inp; 972 } 973 } 974 } 975 #ifdef INET6 976 if (local_wild == NULL) 977 return (local_wild_mapped); 978 #endif 979 return (local_wild); 980 } 981 982 /* 983 * Not found. 984 */ 985 return (NULL); 986 } 987 988 /* 989 * Insert PCB into connection hash table. 990 */ 991 void 992 in_pcbinsconnhash(struct inpcb *inp) 993 { 994 struct inpcbinfo *pcbinfo = inp->inp_cpcbinfo; 995 struct inpcbhead *bucket; 996 u_int32_t hashkey_faddr, hashkey_laddr; 997 998 #ifdef INET6 999 if (inp->inp_vflag & INP_IPV6) { 1000 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX JH */; 1001 hashkey_laddr = inp->in6p_laddr.s6_addr32[3] /* XXX JH */; 1002 } else { 1003 #endif 1004 hashkey_faddr = inp->inp_faddr.s_addr; 1005 hashkey_laddr = inp->inp_laddr.s_addr; 1006 #ifdef INET6 1007 } 1008 #endif 1009 1010 KASSERT(!(inp->inp_flags & INP_CONNECTED), ("already on hash list")); 1011 inp->inp_flags |= INP_CONNECTED; 1012 1013 /* 1014 * Insert into the connection hash table. 1015 */ 1016 bucket = &pcbinfo->hashbase[INP_PCBCONNHASH(hashkey_faddr, 1017 inp->inp_fport, hashkey_laddr, inp->inp_lport, pcbinfo->hashmask)]; 1018 LIST_INSERT_HEAD(bucket, inp, inp_hash); 1019 } 1020 1021 /* 1022 * Remove PCB from connection hash table. 1023 */ 1024 void 1025 in_pcbremconnhash(struct inpcb *inp) 1026 { 1027 KASSERT(inp->inp_flags & INP_CONNECTED, ("inp not connected")); 1028 LIST_REMOVE(inp, inp_hash); 1029 inp->inp_flags &= ~INP_CONNECTED; 1030 } 1031 1032 /* 1033 * Insert PCB into port hash table. 1034 */ 1035 int 1036 in_pcbinsporthash(struct inpcb *inp) 1037 { 1038 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1039 struct inpcbporthead *pcbporthash; 1040 struct inpcbport *phd; 1041 1042 /* 1043 * Insert into the port hash table. 1044 */ 1045 pcbporthash = &pcbinfo->porthashbase[ 1046 INP_PCBPORTHASH(inp->inp_lport, pcbinfo->porthashmask)]; 1047 1048 /* Go through port list and look for a head for this lport. */ 1049 LIST_FOREACH(phd, pcbporthash, phd_hash) 1050 if (phd->phd_port == inp->inp_lport) 1051 break; 1052 1053 /* If none exists, malloc one and tack it on. */ 1054 if (phd == NULL) { 1055 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), 1056 M_PCB, M_INTWAIT | M_NULLOK); 1057 if (phd == NULL) 1058 return (ENOBUFS); /* XXX */ 1059 phd->phd_port = inp->inp_lport; 1060 LIST_INIT(&phd->phd_pcblist); 1061 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1062 } 1063 1064 inp->inp_phd = phd; 1065 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1066 1067 return (0); 1068 } 1069 1070 void 1071 in_pcbinswildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1072 { 1073 struct inpcontainer *ic; 1074 struct inpcontainerhead *bucket; 1075 1076 bucket = &pcbinfo->wildcardhashbase[ 1077 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1078 1079 ic = malloc(sizeof(struct inpcontainer), M_TEMP, M_INTWAIT); 1080 ic->ic_inp = inp; 1081 LIST_INSERT_HEAD(bucket, ic, ic_list); 1082 } 1083 1084 /* 1085 * Insert PCB into wildcard hash table. 1086 */ 1087 void 1088 in_pcbinswildcardhash(struct inpcb *inp) 1089 { 1090 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1091 1092 KKASSERT(pcbinfo != NULL); 1093 1094 in_pcbinswildcardhash_oncpu(inp, pcbinfo); 1095 inp->inp_flags |= INP_WILDCARD; 1096 } 1097 1098 void 1099 in_pcbremwildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1100 { 1101 struct inpcontainer *ic; 1102 struct inpcontainerhead *head; 1103 1104 /* find bucket */ 1105 head = &pcbinfo->wildcardhashbase[ 1106 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1107 1108 LIST_FOREACH(ic, head, ic_list) { 1109 if (ic->ic_inp == inp) 1110 goto found; 1111 } 1112 return; /* not found! */ 1113 1114 found: 1115 LIST_REMOVE(ic, ic_list); /* remove container from bucket chain */ 1116 free(ic, M_TEMP); /* deallocate container */ 1117 } 1118 1119 /* 1120 * Remove PCB from wildcard hash table. 1121 */ 1122 void 1123 in_pcbremwildcardhash(struct inpcb *inp) 1124 { 1125 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1126 1127 KASSERT(inp->inp_flags & INP_WILDCARD, ("inp not wildcard")); 1128 in_pcbremwildcardhash_oncpu(inp, pcbinfo); 1129 inp->inp_flags &= ~INP_WILDCARD; 1130 } 1131 1132 /* 1133 * Remove PCB from various lists. 1134 */ 1135 void 1136 in_pcbremlists(inp) 1137 struct inpcb *inp; 1138 { 1139 if (inp->inp_lport) { 1140 struct inpcbport *phd = inp->inp_phd; 1141 1142 LIST_REMOVE(inp, inp_portlist); 1143 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1144 LIST_REMOVE(phd, phd_hash); 1145 free(phd, M_PCB); 1146 } 1147 } 1148 if (inp->inp_flags & INP_WILDCARD) { 1149 in_pcbremwildcardhash(inp); 1150 } else if (inp->inp_flags & INP_CONNECTED) { 1151 in_pcbremconnhash(inp); 1152 } 1153 LIST_REMOVE(inp, inp_list); 1154 inp->inp_pcbinfo->ipi_count--; 1155 } 1156 1157 int 1158 prison_xinpcb(struct thread *td, struct inpcb *inp) 1159 { 1160 struct ucred *cr; 1161 1162 if (td->td_proc == NULL) 1163 return (0); 1164 cr = td->td_proc->p_ucred; 1165 if (cr->cr_prison == NULL) 1166 return (0); 1167 if (ntohl(inp->inp_laddr.s_addr) == cr->cr_prison->pr_ip) 1168 return (0); 1169 return (1); 1170 } 1171 1172 int 1173 in_pcblist_global(SYSCTL_HANDLER_ARGS) 1174 { 1175 struct inpcbinfo *pcbinfo = arg1; 1176 struct inpcb *inp, *marker; 1177 struct xinpcb xi; 1178 int error, i, n; 1179 inp_gen_t gencnt; 1180 1181 /* 1182 * The process of preparing the TCB list is too time-consuming and 1183 * resource-intensive to repeat twice on every request. 1184 */ 1185 if (req->oldptr == 0) { 1186 n = pcbinfo->ipi_count; 1187 req->oldidx = (n + n/8 + 10) * sizeof(struct xinpcb); 1188 return 0; 1189 } 1190 1191 if (req->newptr != 0) 1192 return EPERM; 1193 1194 /* 1195 * OK, now we're committed to doing something. 1196 */ 1197 gencnt = pcbinfo->ipi_gencnt; 1198 n = pcbinfo->ipi_count; 1199 1200 marker = malloc(sizeof(struct inpcb), M_TEMP, M_WAITOK|M_ZERO); 1201 marker->inp_flags |= INP_PLACEMARKER; 1202 1203 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list); 1204 i = 0; 1205 while ((inp = LIST_NEXT(marker, inp_list)) != NULL && i < n) { 1206 LIST_REMOVE(marker, inp_list); 1207 LIST_INSERT_AFTER(inp, marker, inp_list); 1208 1209 if (inp->inp_flags & INP_PLACEMARKER) 1210 continue; 1211 if (inp->inp_gencnt > gencnt) 1212 continue; 1213 if (prison_xinpcb(req->td, inp)) 1214 continue; 1215 xi.xi_len = sizeof xi; 1216 bcopy(inp, &xi.xi_inp, sizeof *inp); 1217 if (inp->inp_socket) 1218 sotoxsocket(inp->inp_socket, &xi.xi_socket); 1219 if ((error = SYSCTL_OUT(req, &xi, sizeof xi)) != 0) 1220 break; 1221 ++i; 1222 } 1223 LIST_REMOVE(marker, inp_list); 1224 if (error == 0 && i < n) { 1225 bzero(&xi, sizeof(xi)); 1226 xi.xi_len = sizeof(xi); 1227 while (i < n) { 1228 error = SYSCTL_OUT(req, &xi, sizeof(xi)); 1229 ++i; 1230 } 1231 } 1232 free(marker, M_TEMP); 1233 return(error); 1234 } 1235