1 /* 2 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 3 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 4 * 5 * This code is derived from software contributed to The DragonFly Project 6 * by Jeffrey M. Hsu. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of The DragonFly Project nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific, prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Copyright (c) 2004 Jeffrey M. Hsu. All rights reserved. 36 * 37 * License terms: all terms for the DragonFly license above plus the following: 38 * 39 * 4. All advertising materials mentioning features or use of this software 40 * must display the following acknowledgement: 41 * 42 * This product includes software developed by Jeffrey M. Hsu 43 * for the DragonFly Project. 44 * 45 * This requirement may be waived with permission from Jeffrey Hsu. 46 * This requirement will sunset and may be removed on July 8 2005, 47 * after which the standard DragonFly license (as shown above) will 48 * apply. 49 */ 50 51 /* 52 * Copyright (c) 1982, 1986, 1991, 1993, 1995 53 * The Regents of the University of California. All rights reserved. 54 * 55 * Redistribution and use in source and binary forms, with or without 56 * modification, are permitted provided that the following conditions 57 * are met: 58 * 1. Redistributions of source code must retain the above copyright 59 * notice, this list of conditions and the following disclaimer. 60 * 2. Redistributions in binary form must reproduce the above copyright 61 * notice, this list of conditions and the following disclaimer in the 62 * documentation and/or other materials provided with the distribution. 63 * 3. All advertising materials mentioning features or use of this software 64 * must display the following acknowledgement: 65 * This product includes software developed by the University of 66 * California, Berkeley and its contributors. 67 * 4. Neither the name of the University nor the names of its contributors 68 * may be used to endorse or promote products derived from this software 69 * without specific prior written permission. 70 * 71 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 72 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 73 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 74 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 75 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 76 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 77 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 78 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 79 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 80 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 81 * SUCH DAMAGE. 82 * 83 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 84 * $FreeBSD: src/sys/netinet/in_pcb.c,v 1.59.2.27 2004/01/02 04:06:42 ambrisko Exp $ 85 * $DragonFly: src/sys/netinet/in_pcb.c,v 1.31 2004/12/30 05:14:30 hsu Exp $ 86 */ 87 88 #include "opt_ipsec.h" 89 #include "opt_inet6.h" 90 91 #include <sys/param.h> 92 #include <sys/systm.h> 93 #include <sys/malloc.h> 94 #include <sys/mbuf.h> 95 #include <sys/domain.h> 96 #include <sys/protosw.h> 97 #include <sys/socket.h> 98 #include <sys/socketvar.h> 99 #include <sys/proc.h> 100 #include <sys/jail.h> 101 #include <sys/kernel.h> 102 #include <sys/sysctl.h> 103 104 #include <machine/limits.h> 105 106 #include <vm/vm_zone.h> 107 108 #include <net/if.h> 109 #include <net/if_types.h> 110 #include <net/route.h> 111 112 #include <netinet/in.h> 113 #include <netinet/in_pcb.h> 114 #include <netinet/in_var.h> 115 #include <netinet/ip_var.h> 116 #ifdef INET6 117 #include <netinet/ip6.h> 118 #include <netinet6/ip6_var.h> 119 #endif /* INET6 */ 120 121 #ifdef IPSEC 122 #include <netinet6/ipsec.h> 123 #include <netproto/key/key.h> 124 #endif 125 126 #ifdef FAST_IPSEC 127 #if defined(IPSEC) || defined(IPSEC_ESP) 128 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" 129 #endif 130 131 #include <netproto/ipsec/ipsec.h> 132 #include <netproto/ipsec/key.h> 133 #define IPSEC 134 #endif /* FAST_IPSEC */ 135 136 struct in_addr zeroin_addr; 137 138 /* 139 * These configure the range of local port addresses assigned to 140 * "unspecified" outgoing connections/packets/whatever. 141 */ 142 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 143 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 144 145 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */ 146 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */ 147 148 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 149 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 150 151 static __inline void 152 RANGECHK(int var, int min, int max) 153 { 154 if (var < min) 155 var = min; 156 else if (var > max) 157 var = max; 158 } 159 160 static int 161 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 162 { 163 int error; 164 165 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 166 if (!error) { 167 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 168 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 169 170 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 171 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 172 173 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 174 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 175 } 176 return (error); 177 } 178 179 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 180 181 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 182 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 183 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 184 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 185 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 186 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 187 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 188 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 189 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 190 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 191 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 192 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 193 194 /* 195 * in_pcb.c: manage the Protocol Control Blocks. 196 * 197 * NOTE: It is assumed that most of these functions will be called at 198 * splnet(). XXX - There are, unfortunately, a few exceptions to this 199 * rule that should be fixed. 200 * 201 * NOTE: The caller should initialize the cpu field to the cpu running the 202 * protocol stack associated with this inpcbinfo. 203 */ 204 205 void 206 in_pcbinfo_init(struct inpcbinfo *pcbinfo) 207 { 208 LIST_INIT(&pcbinfo->pcblisthead); 209 pcbinfo->cpu = -1; 210 } 211 212 /* 213 * Allocate a PCB and associate it with the socket. 214 */ 215 int 216 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) 217 { 218 struct inpcb *inp; 219 #ifdef IPSEC 220 int error; 221 #endif 222 223 inp = zalloc(pcbinfo->ipi_zone); 224 if (inp == NULL) 225 return (ENOBUFS); 226 bzero(inp, sizeof *inp); 227 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 228 inp->inp_pcbinfo = inp->inp_cpcbinfo = pcbinfo; 229 inp->inp_socket = so; 230 #ifdef IPSEC 231 error = ipsec_init_policy(so, &inp->inp_sp); 232 if (error != 0) { 233 zfree(pcbinfo->ipi_zone, inp); 234 return (error); 235 } 236 #endif 237 #ifdef INET6 238 if (INP_SOCKAF(so) == AF_INET6 && ip6_v6only) 239 inp->inp_flags |= IN6P_IPV6_V6ONLY; 240 if (ip6_auto_flowlabel) 241 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 242 #endif 243 so->so_pcb = inp; 244 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, inp, inp_list); 245 pcbinfo->ipi_count++; 246 return (0); 247 } 248 249 int 250 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 251 { 252 struct socket *so = inp->inp_socket; 253 struct proc *p = td->td_proc; 254 unsigned short *lastport; 255 struct sockaddr_in *sin; 256 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 257 u_short lport = 0; 258 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 259 int error, prison = 0; 260 261 KKASSERT(p); 262 263 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ 264 return (EADDRNOTAVAIL); 265 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 266 return (EINVAL); /* already bound */ 267 if (!(so->so_options & (SO_REUSEADDR|SO_REUSEPORT))) 268 wild = 1; /* neither SO_REUSEADDR nor SO_REUSEPORT is set */ 269 if (nam != NULL) { 270 sin = (struct sockaddr_in *)nam; 271 if (nam->sa_len != sizeof *sin) 272 return (EINVAL); 273 #ifdef notdef 274 /* 275 * We should check the family, but old programs 276 * incorrectly fail to initialize it. 277 */ 278 if (sin->sin_family != AF_INET) 279 return (EAFNOSUPPORT); 280 #endif 281 if (sin->sin_addr.s_addr != INADDR_ANY && 282 prison_ip(td, 0, &sin->sin_addr.s_addr)) 283 return (EINVAL); 284 lport = sin->sin_port; 285 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 286 /* 287 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 288 * allow complete duplication of binding if 289 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 290 * and a multicast address is bound on both 291 * new and duplicated sockets. 292 */ 293 if (so->so_options & SO_REUSEADDR) 294 reuseport = SO_REUSEADDR | SO_REUSEPORT; 295 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 296 sin->sin_port = 0; /* yech... */ 297 bzero(&sin->sin_zero, sizeof sin->sin_zero); 298 if (ifa_ifwithaddr((struct sockaddr *)sin) == NULL) 299 return (EADDRNOTAVAIL); 300 } 301 if (lport != 0) { 302 struct inpcb *t; 303 304 /* GROSS */ 305 if (ntohs(lport) < IPPORT_RESERVED && 306 p && suser_cred(p->p_ucred, PRISON_ROOT)) 307 return (EACCES); 308 if (p && p->p_ucred->cr_prison) 309 prison = 1; 310 if (so->so_cred->cr_uid != 0 && 311 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 312 t = in_pcblookup_local(inp->inp_pcbinfo, 313 sin->sin_addr, lport, 314 prison ? 0 : INPLOOKUP_WILDCARD); 315 if (t && 316 (!in_nullhost(sin->sin_addr) || 317 !in_nullhost(t->inp_laddr) || 318 (t->inp_socket->so_options & 319 SO_REUSEPORT) == 0) && 320 (so->so_cred->cr_uid != 321 t->inp_socket->so_cred->cr_uid)) { 322 #ifdef INET6 323 if (!in_nullhost(sin->sin_addr) || 324 !in_nullhost(t->inp_laddr) || 325 INP_SOCKAF(so) == 326 INP_SOCKAF(t->inp_socket)) 327 #endif 328 return (EADDRINUSE); 329 } 330 } 331 if (prison && prison_ip(td, 0, &sin->sin_addr.s_addr)) 332 return (EADDRNOTAVAIL); 333 t = in_pcblookup_local(pcbinfo, sin->sin_addr, 334 lport, prison ? 0 : wild); 335 if (t && !(reuseport & t->inp_socket->so_options)) { 336 #ifdef INET6 337 if (!in_nullhost(sin->sin_addr) || 338 !in_nullhost(t->inp_laddr) || 339 INP_SOCKAF(so) == INP_SOCKAF(t->inp_socket)) 340 #endif 341 return (EADDRINUSE); 342 } 343 } 344 inp->inp_laddr = sin->sin_addr; 345 } 346 if (lport == 0) { 347 ushort first, last; 348 int count; 349 350 if (inp->inp_laddr.s_addr != INADDR_ANY && 351 prison_ip(td, 0, &inp->inp_laddr.s_addr )) { 352 inp->inp_laddr.s_addr = INADDR_ANY; 353 return (EINVAL); 354 } 355 inp->inp_flags |= INP_ANONPORT; 356 357 if (inp->inp_flags & INP_HIGHPORT) { 358 first = ipport_hifirstauto; /* sysctl */ 359 last = ipport_hilastauto; 360 lastport = &pcbinfo->lasthi; 361 } else if (inp->inp_flags & INP_LOWPORT) { 362 if (p && 363 (error = suser_cred(p->p_ucred, PRISON_ROOT))) { 364 inp->inp_laddr.s_addr = INADDR_ANY; 365 return (error); 366 } 367 first = ipport_lowfirstauto; /* 1023 */ 368 last = ipport_lowlastauto; /* 600 */ 369 lastport = &pcbinfo->lastlow; 370 } else { 371 first = ipport_firstauto; /* sysctl */ 372 last = ipport_lastauto; 373 lastport = &pcbinfo->lastport; 374 } 375 /* 376 * Simple check to ensure all ports are not used up causing 377 * a deadlock here. 378 * 379 * We split the two cases (up and down) so that the direction 380 * is not being tested on each round of the loop. 381 */ 382 if (first > last) { 383 /* 384 * counting down 385 */ 386 count = first - last; 387 388 do { 389 if (count-- < 0) { /* completely used? */ 390 inp->inp_laddr.s_addr = INADDR_ANY; 391 return (EADDRNOTAVAIL); 392 } 393 --*lastport; 394 if (*lastport > first || *lastport < last) 395 *lastport = first; 396 lport = htons(*lastport); 397 } while (in_pcblookup_local(pcbinfo, 398 inp->inp_laddr, lport, wild)); 399 } else { 400 /* 401 * counting up 402 */ 403 count = last - first; 404 405 do { 406 if (count-- < 0) { /* completely used? */ 407 inp->inp_laddr.s_addr = INADDR_ANY; 408 return (EADDRNOTAVAIL); 409 } 410 ++*lastport; 411 if (*lastport < first || *lastport > last) 412 *lastport = first; 413 lport = htons(*lastport); 414 } while (in_pcblookup_local(pcbinfo, 415 inp->inp_laddr, lport, wild)); 416 } 417 } 418 inp->inp_lport = lport; 419 if (prison_ip(td, 0, &inp->inp_laddr.s_addr)) { 420 inp->inp_laddr.s_addr = INADDR_ANY; 421 inp->inp_lport = 0; 422 return (EINVAL); 423 } 424 if (in_pcbinsporthash(inp) != 0) { 425 inp->inp_laddr.s_addr = INADDR_ANY; 426 inp->inp_lport = 0; 427 return (EAGAIN); 428 } 429 return (0); 430 } 431 432 /* 433 * Transform old in_pcbconnect() into an inner subroutine for new 434 * in_pcbconnect(): Do some validity-checking on the remote 435 * address (in mbuf 'nam') and then determine local host address 436 * (i.e., which interface) to use to access that remote host. 437 * 438 * This preserves definition of in_pcbconnect(), while supporting a 439 * slightly different version for T/TCP. (This is more than 440 * a bit of a kludge, but cleaning up the internal interfaces would 441 * have forced minor changes in every protocol). 442 */ 443 int 444 in_pcbladdr(inp, nam, plocal_sin) 445 struct inpcb *inp; 446 struct sockaddr *nam; 447 struct sockaddr_in **plocal_sin; 448 { 449 struct in_ifaddr *ia; 450 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 451 452 if (nam->sa_len != sizeof *sin) 453 return (EINVAL); 454 if (sin->sin_family != AF_INET) 455 return (EAFNOSUPPORT); 456 if (sin->sin_port == 0) 457 return (EADDRNOTAVAIL); 458 if (!TAILQ_EMPTY(&in_ifaddrhead)) { 459 ia = TAILQ_FIRST(&in_ifaddrhead); 460 /* 461 * If the destination address is INADDR_ANY, 462 * use the primary local address. 463 * If the supplied address is INADDR_BROADCAST, 464 * and the primary interface supports broadcast, 465 * choose the broadcast address for that interface. 466 */ 467 if (sin->sin_addr.s_addr == INADDR_ANY) 468 sin->sin_addr = IA_SIN(ia)->sin_addr; 469 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST && 470 (ia->ia_ifp->if_flags & IFF_BROADCAST)) 471 sin->sin_addr = satosin(&ia->ia_broadaddr)->sin_addr; 472 } 473 if (inp->inp_laddr.s_addr == INADDR_ANY) { 474 struct route *ro; 475 476 ia = (struct in_ifaddr *)NULL; 477 /* 478 * If route is known or can be allocated now, 479 * our src addr is taken from the i/f, else punt. 480 * Note that we should check the address family of the cached 481 * destination, in case of sharing the cache with IPv6. 482 */ 483 ro = &inp->inp_route; 484 if (ro->ro_rt && 485 (!(ro->ro_rt->rt_flags & RTF_UP) || 486 ro->ro_dst.sa_family != AF_INET || 487 satosin(&ro->ro_dst)->sin_addr.s_addr != 488 sin->sin_addr.s_addr || 489 inp->inp_socket->so_options & SO_DONTROUTE)) { 490 RTFREE(ro->ro_rt); 491 ro->ro_rt = (struct rtentry *)NULL; 492 } 493 if (!(inp->inp_socket->so_options & SO_DONTROUTE) && /*XXX*/ 494 (ro->ro_rt == (struct rtentry *)NULL || 495 ro->ro_rt->rt_ifp == (struct ifnet *)NULL)) { 496 /* No route yet, so try to acquire one */ 497 bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); 498 ro->ro_dst.sa_family = AF_INET; 499 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 500 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = 501 sin->sin_addr; 502 rtalloc(ro); 503 } 504 /* 505 * If we found a route, use the address 506 * corresponding to the outgoing interface 507 * unless it is the loopback (in case a route 508 * to our address on another net goes to loopback). 509 */ 510 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) 511 ia = ifatoia(ro->ro_rt->rt_ifa); 512 if (ia == NULL) { 513 u_short fport = sin->sin_port; 514 515 sin->sin_port = 0; 516 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); 517 if (ia == NULL) 518 ia = ifatoia(ifa_ifwithnet(sintosa(sin))); 519 sin->sin_port = fport; 520 if (ia == NULL) 521 ia = TAILQ_FIRST(&in_ifaddrhead); 522 if (ia == NULL) 523 return (EADDRNOTAVAIL); 524 } 525 /* 526 * If the destination address is multicast and an outgoing 527 * interface has been set as a multicast option, use the 528 * address of that interface as our source address. 529 */ 530 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && 531 inp->inp_moptions != NULL) { 532 struct ip_moptions *imo; 533 struct ifnet *ifp; 534 535 imo = inp->inp_moptions; 536 if (imo->imo_multicast_ifp != NULL) { 537 ifp = imo->imo_multicast_ifp; 538 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) 539 if (ia->ia_ifp == ifp) 540 break; 541 if (ia == NULL) 542 return (EADDRNOTAVAIL); 543 } 544 } 545 /* 546 * Don't do pcblookup call here; return interface in plocal_sin 547 * and exit to caller, that will do the lookup. 548 */ 549 *plocal_sin = &ia->ia_addr; 550 551 } 552 return (0); 553 } 554 555 /* 556 * Outer subroutine: 557 * Connect from a socket to a specified address. 558 * Both address and port must be specified in argument sin. 559 * If don't have a local address for this socket yet, 560 * then pick one. 561 */ 562 int 563 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 564 { 565 struct sockaddr_in *if_sin; 566 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 567 struct sockaddr_in sa; 568 struct ucred *cr = td->td_proc ? td->td_proc->p_ucred : NULL; 569 int error; 570 571 if (cr && cr->cr_prison != NULL && in_nullhost(inp->inp_laddr)) { 572 bzero(&sa, sizeof sa); 573 sa.sin_addr.s_addr = htonl(cr->cr_prison->pr_ip); 574 sa.sin_len = sizeof sa; 575 sa.sin_family = AF_INET; 576 error = in_pcbbind(inp, (struct sockaddr *)&sa, td); 577 if (error) 578 return (error); 579 } 580 581 /* Call inner routine to assign local interface address. */ 582 if ((error = in_pcbladdr(inp, nam, &if_sin)) != 0) 583 return (error); 584 585 if (in_pcblookup_hash(inp->inp_cpcbinfo, sin->sin_addr, sin->sin_port, 586 inp->inp_laddr.s_addr ? inp->inp_laddr : if_sin->sin_addr, 587 inp->inp_lport, FALSE, NULL) != NULL) { 588 return (EADDRINUSE); 589 } 590 if (inp->inp_laddr.s_addr == INADDR_ANY) { 591 if (inp->inp_lport == 0) { 592 error = in_pcbbind(inp, (struct sockaddr *)NULL, td); 593 if (error) 594 return (error); 595 } 596 inp->inp_laddr = if_sin->sin_addr; 597 } 598 inp->inp_faddr = sin->sin_addr; 599 inp->inp_fport = sin->sin_port; 600 in_pcbinsconnhash(inp); 601 return (0); 602 } 603 604 void 605 in_pcbdisconnect(inp) 606 struct inpcb *inp; 607 { 608 609 inp->inp_faddr.s_addr = INADDR_ANY; 610 inp->inp_fport = 0; 611 in_pcbremconnhash(inp); 612 if (inp->inp_socket->so_state & SS_NOFDREF) 613 in_pcbdetach(inp); 614 } 615 616 void 617 in_pcbdetach(inp) 618 struct inpcb *inp; 619 { 620 struct socket *so = inp->inp_socket; 621 struct inpcbinfo *ipi = inp->inp_pcbinfo; 622 623 #ifdef IPSEC 624 ipsec4_delete_pcbpolicy(inp); 625 #endif /*IPSEC*/ 626 inp->inp_gencnt = ++ipi->ipi_gencnt; 627 in_pcbremlists(inp); 628 so->so_pcb = 0; 629 sofree(so); 630 if (inp->inp_options) 631 m_free(inp->inp_options); 632 if (inp->inp_route.ro_rt) 633 rtfree(inp->inp_route.ro_rt); 634 ip_freemoptions(inp->inp_moptions); 635 inp->inp_vflag = 0; 636 zfree(ipi->ipi_zone, inp); 637 } 638 639 /* 640 * The calling convention of in_setsockaddr() and in_setpeeraddr() was 641 * modified to match the pru_sockaddr() and pru_peeraddr() entry points 642 * in struct pr_usrreqs, so that protocols can just reference then directly 643 * without the need for a wrapper function. The socket must have a valid 644 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 645 * except through a kernel programming error, so it is acceptable to panic 646 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 647 * because there actually /is/ a programming error somewhere... XXX) 648 */ 649 int 650 in_setsockaddr(so, nam) 651 struct socket *so; 652 struct sockaddr **nam; 653 { 654 int s; 655 struct inpcb *inp; 656 struct sockaddr_in *sin; 657 658 /* 659 * Do the malloc first in case it blocks. 660 */ 661 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 662 M_WAITOK | M_ZERO); 663 sin->sin_family = AF_INET; 664 sin->sin_len = sizeof *sin; 665 666 s = splnet(); 667 inp = sotoinpcb(so); 668 if (!inp) { 669 splx(s); 670 free(sin, M_SONAME); 671 return (ECONNRESET); 672 } 673 sin->sin_port = inp->inp_lport; 674 sin->sin_addr = inp->inp_laddr; 675 splx(s); 676 677 *nam = (struct sockaddr *)sin; 678 return (0); 679 } 680 681 int 682 in_setpeeraddr(so, nam) 683 struct socket *so; 684 struct sockaddr **nam; 685 { 686 int s; 687 struct inpcb *inp; 688 struct sockaddr_in *sin; 689 690 /* 691 * Do the malloc first in case it blocks. 692 */ 693 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 694 M_WAITOK | M_ZERO); 695 sin->sin_family = AF_INET; 696 sin->sin_len = sizeof *sin; 697 698 s = splnet(); 699 inp = sotoinpcb(so); 700 if (!inp) { 701 splx(s); 702 free(sin, M_SONAME); 703 return (ECONNRESET); 704 } 705 sin->sin_port = inp->inp_fport; 706 sin->sin_addr = inp->inp_faddr; 707 splx(s); 708 709 *nam = (struct sockaddr *)sin; 710 return (0); 711 } 712 713 void 714 in_pcbnotifyall(head, faddr, errno, notify) 715 struct inpcbhead *head; 716 struct in_addr faddr; 717 void (*notify) (struct inpcb *, int); 718 { 719 struct inpcb *inp, *ninp; 720 int s; 721 722 /* 723 * note: if INP_PLACEMARKER is set we must ignore the rest of 724 * the structure and skip it. 725 */ 726 s = splnet(); 727 for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { 728 ninp = LIST_NEXT(inp, inp_list); 729 if (inp->inp_flags & INP_PLACEMARKER) 730 continue; 731 #ifdef INET6 732 if (!(inp->inp_vflag & INP_IPV4)) 733 continue; 734 #endif 735 if (inp->inp_faddr.s_addr != faddr.s_addr || 736 inp->inp_socket == NULL) 737 continue; 738 (*notify)(inp, errno); 739 } 740 splx(s); 741 } 742 743 void 744 in_pcbpurgeif0(head, ifp) 745 struct inpcb *head; 746 struct ifnet *ifp; 747 { 748 struct inpcb *inp; 749 struct ip_moptions *imo; 750 int i, gap; 751 752 for (inp = head; inp != NULL; inp = LIST_NEXT(inp, inp_list)) { 753 if (inp->inp_flags & INP_PLACEMARKER) 754 continue; 755 imo = inp->inp_moptions; 756 if ((inp->inp_vflag & INP_IPV4) && imo != NULL) { 757 /* 758 * Unselect the outgoing interface if it is being 759 * detached. 760 */ 761 if (imo->imo_multicast_ifp == ifp) 762 imo->imo_multicast_ifp = NULL; 763 764 /* 765 * Drop multicast group membership if we joined 766 * through the interface being detached. 767 */ 768 for (i = 0, gap = 0; i < imo->imo_num_memberships; 769 i++) { 770 if (imo->imo_membership[i]->inm_ifp == ifp) { 771 in_delmulti(imo->imo_membership[i]); 772 gap++; 773 } else if (gap != 0) 774 imo->imo_membership[i - gap] = 775 imo->imo_membership[i]; 776 } 777 imo->imo_num_memberships -= gap; 778 } 779 } 780 } 781 782 /* 783 * Check for alternatives when higher level complains 784 * about service problems. For now, invalidate cached 785 * routing information. If the route was created dynamically 786 * (by a redirect), time to try a default gateway again. 787 */ 788 void 789 in_losing(inp) 790 struct inpcb *inp; 791 { 792 struct rtentry *rt; 793 struct rt_addrinfo info; 794 795 if ((rt = inp->inp_route.ro_rt)) { 796 bzero(&info, sizeof info); 797 info.rti_flags = rt->rt_flags; 798 info.rti_info[RTAX_DST] = rt_key(rt); 799 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 800 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 801 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); 802 if (rt->rt_flags & RTF_DYNAMIC) 803 rtrequest1(RTM_DELETE, &info, NULL); 804 inp->inp_route.ro_rt = NULL; 805 rtfree(rt); 806 /* 807 * A new route can be allocated 808 * the next time output is attempted. 809 */ 810 } 811 } 812 813 /* 814 * After a routing change, flush old routing 815 * and allocate a (hopefully) better one. 816 */ 817 void 818 in_rtchange(inp, errno) 819 struct inpcb *inp; 820 int errno; 821 { 822 if (inp->inp_route.ro_rt) { 823 rtfree(inp->inp_route.ro_rt); 824 inp->inp_route.ro_rt = 0; 825 /* 826 * A new route can be allocated the next time 827 * output is attempted. 828 */ 829 } 830 } 831 832 /* 833 * Lookup a PCB based on the local address and port. 834 */ 835 struct inpcb * 836 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) 837 struct inpcbinfo *pcbinfo; 838 struct in_addr laddr; 839 u_int lport_arg; 840 int wild_okay; 841 { 842 struct inpcb *inp; 843 int matchwild = 3, wildcard; 844 u_short lport = lport_arg; 845 846 struct inpcbporthead *porthash; 847 struct inpcbport *phd; 848 struct inpcb *match = NULL; 849 850 /* 851 * Best fit PCB lookup. 852 * 853 * First see if this local port is in use by looking on the 854 * port hash list. 855 */ 856 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, 857 pcbinfo->porthashmask)]; 858 LIST_FOREACH(phd, porthash, phd_hash) { 859 if (phd->phd_port == lport) 860 break; 861 } 862 if (phd != NULL) { 863 /* 864 * Port is in use by one or more PCBs. Look for best 865 * fit. 866 */ 867 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 868 wildcard = 0; 869 #ifdef INET6 870 if ((inp->inp_vflag & INP_IPV4) == 0) 871 continue; 872 #endif 873 if (inp->inp_faddr.s_addr != INADDR_ANY) 874 wildcard++; 875 if (inp->inp_laddr.s_addr != INADDR_ANY) { 876 if (laddr.s_addr == INADDR_ANY) 877 wildcard++; 878 else if (inp->inp_laddr.s_addr != laddr.s_addr) 879 continue; 880 } else { 881 if (laddr.s_addr != INADDR_ANY) 882 wildcard++; 883 } 884 if (wildcard && !wild_okay) 885 continue; 886 if (wildcard < matchwild) { 887 match = inp; 888 matchwild = wildcard; 889 if (matchwild == 0) { 890 break; 891 } 892 } 893 } 894 } 895 return (match); 896 } 897 898 /* 899 * Lookup PCB in hash list. 900 */ 901 struct inpcb * 902 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, ifp) 903 struct inpcbinfo *pcbinfo; 904 struct in_addr faddr, laddr; 905 u_int fport_arg, lport_arg; 906 boolean_t wildcard; 907 struct ifnet *ifp; 908 { 909 struct inpcbhead *head; 910 struct inpcb *inp; 911 u_short fport = fport_arg, lport = lport_arg; 912 913 /* 914 * First look for an exact match. 915 */ 916 head = &pcbinfo->hashbase[INP_PCBCONNHASH(faddr.s_addr, fport, 917 laddr.s_addr, lport, pcbinfo->hashmask)]; 918 LIST_FOREACH(inp, head, inp_hash) { 919 #ifdef INET6 920 if (!(inp->inp_vflag & INP_IPV4)) 921 continue; 922 #endif 923 if (in_hosteq(inp->inp_faddr, faddr) && 924 in_hosteq(inp->inp_laddr, laddr) && 925 inp->inp_fport == fport && inp->inp_lport == lport) { 926 /* found */ 927 return (inp); 928 } 929 } 930 931 if (wildcard) { 932 struct inpcb *local_wild = NULL; 933 #ifdef INET6 934 struct inpcb *local_wild_mapped = NULL; 935 #endif 936 struct inpcontainer *ic; 937 struct inpcontainerhead *chead; 938 939 chead = &pcbinfo->wildcardhashbase[ 940 INP_PCBWILDCARDHASH(lport, pcbinfo->wildcardhashmask)]; 941 LIST_FOREACH(ic, chead, ic_list) { 942 inp = ic->ic_inp; 943 #ifdef INET6 944 if (!(inp->inp_vflag & INP_IPV4)) 945 continue; 946 #endif 947 if (inp->inp_lport == lport) { 948 if (ifp && ifp->if_type == IFT_FAITH && 949 !(inp->inp_flags & INP_FAITH)) 950 continue; 951 if (inp->inp_laddr.s_addr == laddr.s_addr) 952 return (inp); 953 if (inp->inp_laddr.s_addr == INADDR_ANY) { 954 #ifdef INET6 955 if (INP_CHECK_SOCKAF(inp->inp_socket, 956 AF_INET6)) 957 local_wild_mapped = inp; 958 else 959 #endif 960 local_wild = inp; 961 } 962 } 963 } 964 #ifdef INET6 965 if (local_wild == NULL) 966 return (local_wild_mapped); 967 #endif 968 return (local_wild); 969 } 970 971 /* 972 * Not found. 973 */ 974 return (NULL); 975 } 976 977 /* 978 * Insert PCB into connection hash table. 979 */ 980 void 981 in_pcbinsconnhash(struct inpcb *inp) 982 { 983 struct inpcbinfo *pcbinfo = inp->inp_cpcbinfo; 984 struct inpcbhead *bucket; 985 u_int32_t hashkey_faddr, hashkey_laddr; 986 987 #ifdef INET6 988 if (inp->inp_vflag & INP_IPV6) { 989 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX JH */; 990 hashkey_laddr = inp->in6p_laddr.s6_addr32[3] /* XXX JH */; 991 } else { 992 #endif 993 hashkey_faddr = inp->inp_faddr.s_addr; 994 hashkey_laddr = inp->inp_laddr.s_addr; 995 #ifdef INET6 996 } 997 #endif 998 999 KASSERT(!(inp->inp_flags & INP_CONNECTED), ("already on hash list")); 1000 inp->inp_flags |= INP_CONNECTED; 1001 1002 /* 1003 * Insert into the connection hash table. 1004 */ 1005 bucket = &pcbinfo->hashbase[INP_PCBCONNHASH(hashkey_faddr, 1006 inp->inp_fport, hashkey_laddr, inp->inp_lport, pcbinfo->hashmask)]; 1007 LIST_INSERT_HEAD(bucket, inp, inp_hash); 1008 } 1009 1010 /* 1011 * Remove PCB from connection hash table. 1012 */ 1013 void 1014 in_pcbremconnhash(struct inpcb *inp) 1015 { 1016 KASSERT(inp->inp_flags & INP_CONNECTED, ("inp not connected")); 1017 LIST_REMOVE(inp, inp_hash); 1018 inp->inp_flags &= ~INP_CONNECTED; 1019 } 1020 1021 /* 1022 * Insert PCB into port hash table. 1023 */ 1024 int 1025 in_pcbinsporthash(struct inpcb *inp) 1026 { 1027 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1028 struct inpcbporthead *pcbporthash; 1029 struct inpcbport *phd; 1030 1031 /* 1032 * Insert into the port hash table. 1033 */ 1034 pcbporthash = &pcbinfo->porthashbase[ 1035 INP_PCBPORTHASH(inp->inp_lport, pcbinfo->porthashmask)]; 1036 1037 /* Go through port list and look for a head for this lport. */ 1038 LIST_FOREACH(phd, pcbporthash, phd_hash) 1039 if (phd->phd_port == inp->inp_lport) 1040 break; 1041 1042 /* If none exists, malloc one and tack it on. */ 1043 if (phd == NULL) { 1044 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), 1045 M_PCB, M_INTWAIT | M_NULLOK); 1046 if (phd == NULL) 1047 return (ENOBUFS); /* XXX */ 1048 phd->phd_port = inp->inp_lport; 1049 LIST_INIT(&phd->phd_pcblist); 1050 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1051 } 1052 1053 inp->inp_phd = phd; 1054 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1055 1056 return (0); 1057 } 1058 1059 void 1060 in_pcbinswildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1061 { 1062 struct inpcontainer *ic; 1063 struct inpcontainerhead *bucket; 1064 1065 bucket = &pcbinfo->wildcardhashbase[ 1066 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1067 1068 ic = malloc(sizeof(struct inpcontainer), M_TEMP, M_INTWAIT); 1069 ic->ic_inp = inp; 1070 LIST_INSERT_HEAD(bucket, ic, ic_list); 1071 } 1072 1073 /* 1074 * Insert PCB into wildcard hash table. 1075 */ 1076 void 1077 in_pcbinswildcardhash(struct inpcb *inp) 1078 { 1079 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1080 1081 KKASSERT(pcbinfo != NULL); 1082 1083 in_pcbinswildcardhash_oncpu(inp, pcbinfo); 1084 inp->inp_flags |= INP_WILDCARD; 1085 } 1086 1087 void 1088 in_pcbremwildcardhash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) 1089 { 1090 struct inpcontainer *ic; 1091 struct inpcontainerhead *head; 1092 1093 /* find bucket */ 1094 head = &pcbinfo->wildcardhashbase[ 1095 INP_PCBWILDCARDHASH(inp->inp_lport, pcbinfo->wildcardhashmask)]; 1096 1097 LIST_FOREACH(ic, head, ic_list) { 1098 if (ic->ic_inp == inp) 1099 goto found; 1100 } 1101 return; /* not found! */ 1102 1103 found: 1104 LIST_REMOVE(ic, ic_list); /* remove container from bucket chain */ 1105 free(ic, M_TEMP); /* deallocate container */ 1106 } 1107 1108 /* 1109 * Remove PCB from wildcard hash table. 1110 */ 1111 void 1112 in_pcbremwildcardhash(struct inpcb *inp) 1113 { 1114 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1115 1116 KASSERT(inp->inp_flags & INP_WILDCARD, ("inp not wildcard")); 1117 in_pcbremwildcardhash_oncpu(inp, pcbinfo); 1118 inp->inp_flags &= ~INP_WILDCARD; 1119 } 1120 1121 /* 1122 * Remove PCB from various lists. 1123 */ 1124 void 1125 in_pcbremlists(inp) 1126 struct inpcb *inp; 1127 { 1128 if (inp->inp_lport) { 1129 struct inpcbport *phd = inp->inp_phd; 1130 1131 LIST_REMOVE(inp, inp_portlist); 1132 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1133 LIST_REMOVE(phd, phd_hash); 1134 free(phd, M_PCB); 1135 } 1136 } 1137 if (inp->inp_flags & INP_WILDCARD) { 1138 in_pcbremwildcardhash(inp); 1139 } else if (inp->inp_flags & INP_CONNECTED) { 1140 in_pcbremconnhash(inp); 1141 } 1142 LIST_REMOVE(inp, inp_list); 1143 inp->inp_pcbinfo->ipi_count--; 1144 } 1145 1146 int 1147 prison_xinpcb(struct thread *td, struct inpcb *inp) 1148 { 1149 struct ucred *cr; 1150 1151 if (td->td_proc == NULL) 1152 return (0); 1153 cr = td->td_proc->p_ucred; 1154 if (cr->cr_prison == NULL) 1155 return (0); 1156 if (ntohl(inp->inp_laddr.s_addr) == cr->cr_prison->pr_ip) 1157 return (0); 1158 return (1); 1159 } 1160 1161 int 1162 in_pcblist_global(SYSCTL_HANDLER_ARGS) 1163 { 1164 struct inpcbinfo *pcbinfo = arg1; 1165 struct inpcb *inp, *marker; 1166 struct xinpcb xi; 1167 int error, i, n; 1168 inp_gen_t gencnt; 1169 1170 /* 1171 * The process of preparing the TCB list is too time-consuming and 1172 * resource-intensive to repeat twice on every request. 1173 */ 1174 if (req->oldptr == NULL) { 1175 n = pcbinfo->ipi_count; 1176 req->oldidx = (n + n/8 + 10) * sizeof(struct xinpcb); 1177 return 0; 1178 } 1179 1180 if (req->newptr != NULL) 1181 return EPERM; 1182 1183 /* 1184 * OK, now we're committed to doing something. Re-fetch ipi_count 1185 * after obtaining the generation count. 1186 */ 1187 gencnt = pcbinfo->ipi_gencnt; 1188 n = pcbinfo->ipi_count; 1189 1190 marker = malloc(sizeof(struct inpcb), M_TEMP, M_WAITOK|M_ZERO); 1191 marker->inp_flags |= INP_PLACEMARKER; 1192 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list); 1193 1194 i = 0; 1195 error = 0; 1196 1197 while ((inp = LIST_NEXT(marker, inp_list)) != NULL && i < n) { 1198 LIST_REMOVE(marker, inp_list); 1199 LIST_INSERT_AFTER(inp, marker, inp_list); 1200 1201 if (inp->inp_flags & INP_PLACEMARKER) 1202 continue; 1203 if (inp->inp_gencnt > gencnt) 1204 continue; 1205 if (prison_xinpcb(req->td, inp)) 1206 continue; 1207 xi.xi_len = sizeof xi; 1208 bcopy(inp, &xi.xi_inp, sizeof *inp); 1209 if (inp->inp_socket) 1210 sotoxsocket(inp->inp_socket, &xi.xi_socket); 1211 if ((error = SYSCTL_OUT(req, &xi, sizeof xi)) != 0) 1212 break; 1213 ++i; 1214 } 1215 LIST_REMOVE(marker, inp_list); 1216 if (error == 0 && i < n) { 1217 bzero(&xi, sizeof(xi)); 1218 xi.xi_len = sizeof(xi); 1219 while (i < n) { 1220 error = SYSCTL_OUT(req, &xi, sizeof xi); 1221 ++i; 1222 } 1223 } 1224 free(marker, M_TEMP); 1225 return(error); 1226 } 1227