1 /* $FreeBSD: src/sys/netinet6/in6_pcb.c,v 1.10.2.9 2003/01/24 05:11:35 sam Exp $ */ 2 /* $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1991, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 63 */ 64 65 #include "opt_inet.h" 66 #include "opt_inet6.h" 67 68 #include <sys/param.h> 69 #include <sys/systm.h> 70 #include <sys/malloc.h> 71 #include <sys/mbuf.h> 72 #include <sys/domain.h> 73 #include <sys/protosw.h> 74 #include <sys/socket.h> 75 #include <sys/socketvar.h> 76 #include <sys/sockio.h> 77 #include <sys/errno.h> 78 #include <sys/time.h> 79 #include <sys/proc.h> 80 #include <sys/priv.h> 81 #include <sys/jail.h> 82 83 #include <sys/msgport2.h> 84 85 #include <vm/vm_zone.h> 86 87 #include <net/if.h> 88 #include <net/if_types.h> 89 #include <net/route.h> 90 #include <net/netisr2.h> 91 92 #include <netinet/in.h> 93 #include <netinet/in_var.h> 94 #include <netinet/in_systm.h> 95 #include <netinet/ip6.h> 96 #include <netinet/ip_var.h> 97 #include <netinet6/ip6_var.h> 98 #include <netinet6/nd6.h> 99 #include <netinet/in_pcb.h> 100 #include <netinet6/in6_pcb.h> 101 102 struct in6_addr zeroin6_addr; 103 104 int 105 in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 106 { 107 struct socket *so = inp->inp_socket; 108 struct sockaddr_in6 jsin6; 109 int error; 110 111 if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 112 return (EINVAL); 113 114 if (nam) { 115 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 116 struct inpcbinfo *pcbinfo; 117 struct inpcbportinfo *portinfo; 118 struct inpcbporthead *porthash; 119 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 120 struct ucred *cred = NULL; 121 struct inpcb *t; 122 u_short lport, lport_ho; 123 124 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 125 wild = 1; 126 if (td->td_proc != NULL) 127 cred = td->td_proc->p_ucred; 128 129 if (nam->sa_len != sizeof(*sin6)) 130 return (EINVAL); 131 /* 132 * family check. 133 */ 134 if (nam->sa_family != AF_INET6) 135 return (EAFNOSUPPORT); 136 137 /* Reject v4-mapped address */ 138 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 139 return (EADDRNOTAVAIL); 140 141 if (!prison_replace_wildcards(td, nam)) 142 return (EINVAL); 143 144 /* KAME hack: embed scopeid */ 145 if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL) != 0) 146 return (EINVAL); 147 /* this must be cleared for ifa_ifwithaddr() */ 148 sin6->sin6_scope_id = 0; 149 150 lport = sin6->sin6_port; 151 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 152 /* 153 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 154 * allow compepte duplication of binding if 155 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 156 * and a multicast address is bound on both 157 * new and duplicated sockets. 158 */ 159 if (so->so_options & SO_REUSEADDR) 160 reuseport = SO_REUSEADDR|SO_REUSEPORT; 161 } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 162 struct ifaddr *ia = NULL; 163 164 sin6->sin6_port = 0; /* yech... */ 165 if (!prison_replace_wildcards(td, (struct sockaddr *)sin6)) { 166 sin6->sin6_addr = kin6addr_any; 167 return (EINVAL); 168 } 169 if ((ia = ifa_ifwithaddr((struct sockaddr *)sin6)) == NULL) 170 return (EADDRNOTAVAIL); 171 172 /* 173 * XXX: bind to an anycast address might accidentally 174 * cause sending a packet with anycast source address. 175 * We should allow to bind to a deprecated address, since 176 * the application dares to use it. 177 */ 178 if (ia && 179 ((struct in6_ifaddr *)ia)->ia6_flags & 180 (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) 181 return (EADDRNOTAVAIL); 182 } 183 184 inp->in6p_laddr = sin6->sin6_addr; 185 186 if (lport == 0) 187 goto auto_select; 188 lport_ho = ntohs(lport); 189 190 /* GROSS */ 191 if (lport_ho < IPV6PORT_RESERVED && cred && 192 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0)) { 193 inp->in6p_laddr = kin6addr_any; 194 return (EACCES); 195 } 196 197 /* 198 * Locate the proper portinfo based on lport 199 */ 200 pcbinfo = inp->inp_pcbinfo; 201 portinfo = 202 &pcbinfo->portinfo[lport_ho % pcbinfo->portinfo_cnt]; 203 KKASSERT((lport_ho % pcbinfo->portinfo_cnt) == 204 portinfo->offset); 205 206 /* 207 * This has to be atomic. If the porthash is shared across 208 * multiple protocol threads (aka tcp) then the token must 209 * be held. 210 */ 211 porthash = in_pcbporthash_head(portinfo, lport); 212 GET_PORTHASH_TOKEN(porthash); 213 214 if (so->so_cred->cr_uid != 0 && 215 !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 216 t = in6_pcblookup_local(porthash, &sin6->sin6_addr, 217 lport, INPLOOKUP_WILDCARD, 218 cred); 219 if (t && 220 (so->so_cred->cr_uid != 221 t->inp_socket->so_cred->cr_uid)) { 222 inp->in6p_laddr = kin6addr_any; 223 error = EADDRINUSE; 224 goto done; 225 } 226 } 227 if (cred && cred->cr_prison && 228 !prison_replace_wildcards(td, nam)) { 229 inp->in6p_laddr = kin6addr_any; 230 error = EADDRNOTAVAIL; 231 goto done; 232 } 233 t = in6_pcblookup_local(porthash, &sin6->sin6_addr, lport, 234 wild, cred); 235 if (t && (reuseport & t->inp_socket->so_options) == 0) { 236 inp->in6p_laddr = kin6addr_any; 237 error = EADDRINUSE; 238 goto done; 239 } 240 241 inp->inp_lport = lport; 242 in_pcbinsporthash(porthash, inp); 243 error = 0; 244 done: 245 REL_PORTHASH_TOKEN(porthash); 246 return (error); 247 } else { 248 auto_select: 249 jsin6.sin6_addr = inp->in6p_laddr; 250 jsin6.sin6_family = AF_INET6; 251 if (!prison_replace_wildcards(td, (struct sockaddr*)&jsin6)) { 252 inp->in6p_laddr = kin6addr_any; 253 inp->inp_lport = 0; 254 return (EINVAL); 255 } 256 257 return in6_pcbsetlport(&inp->in6p_laddr, inp, td); 258 } 259 } 260 261 /* 262 * Transform old in6_pcbconnect() into an inner subroutine for new 263 * in6_pcbconnect(): Do some validity-checking on the remote 264 * address (in mbuf 'nam') and then determine local host address 265 * (i.e., which interface) to use to access that remote host. 266 * 267 * This preserves definition of in6_pcbconnect(), while supporting a 268 * slightly different version for T/TCP. (This is more than 269 * a bit of a kludge, but cleaning up the internal interfaces would 270 * have forced minor changes in every protocol). 271 */ 272 273 int 274 in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam, 275 struct in6_addr **plocal_addr6, struct thread *td) 276 { 277 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 278 struct ifnet *ifp = NULL; 279 int error = 0; 280 281 if (nam->sa_len != sizeof (*sin6)) 282 return (EINVAL); 283 if (sin6->sin6_family != AF_INET6) 284 return (EAFNOSUPPORT); 285 if (sin6->sin6_port == 0) 286 return (EADDRNOTAVAIL); 287 288 /* KAME hack: embed scopeid */ 289 if (in6_embedscope(&sin6->sin6_addr, sin6, inp, &ifp) != 0) 290 return EINVAL; 291 292 if (in6_ifaddr) { 293 /* 294 * If the destination address is UNSPECIFIED addr, 295 * use the loopback addr, e.g ::1. 296 */ 297 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 298 sin6->sin6_addr = kin6addr_loopback; 299 } 300 { 301 /* 302 * XXX: in6_selectsrc might replace the bound local address 303 * with the address specified by setsockopt(IPV6_PKTINFO). 304 * Is it the intended behavior? 305 */ 306 *plocal_addr6 = in6_selectsrc(sin6, inp->in6p_outputopts, 307 inp->in6p_moptions, 308 &inp->in6p_route, 309 &inp->in6p_laddr, &error, td); 310 if (*plocal_addr6 == NULL) { 311 if (error == 0) 312 error = EADDRNOTAVAIL; 313 return (error); 314 } 315 /* 316 * Don't do pcblookup call here; return interface in 317 * plocal_addr6 318 * and exit to caller, that will do the lookup. 319 */ 320 } 321 322 if (inp->in6p_route.ro_rt) 323 ifp = inp->in6p_route.ro_rt->rt_ifp; 324 325 return (0); 326 } 327 328 /* 329 * Outer subroutine: 330 * Connect from a socket to a specified address. 331 * Both address and port must be specified in argument sin. 332 * If don't have a local address for this socket yet, 333 * then pick one. 334 */ 335 int 336 in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 337 { 338 struct in6_addr *addr6; 339 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 340 int error; 341 342 /* Reject v4-mapped address */ 343 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 344 return EADDRNOTAVAIL; 345 346 /* 347 * Call inner routine, to assign local interface address. 348 * in6_pcbladdr() may automatically fill in sin6_scope_id. 349 */ 350 if ((error = in6_pcbladdr(inp, nam, &addr6, td)) != 0) 351 return (error); 352 353 if (in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr, 354 sin6->sin6_port, 355 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 356 ? addr6 : &inp->in6p_laddr, 357 inp->inp_lport, 0, NULL) != NULL) { 358 return (EADDRINUSE); 359 } 360 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 361 if (inp->inp_lport == 0) { 362 error = in6_pcbbind(inp, NULL, td); 363 if (error) 364 return (error); 365 } 366 inp->in6p_laddr = *addr6; 367 } 368 inp->in6p_faddr = sin6->sin6_addr; 369 inp->inp_fport = sin6->sin6_port; 370 /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ 371 inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; 372 if (inp->in6p_flags & IN6P_AUTOFLOWLABEL) 373 inp->in6p_flowinfo |= 374 (htonl(ip6_flow_seq++) & IPV6_FLOWLABEL_MASK); 375 376 in_pcbinsconnhash(inp); 377 return (0); 378 } 379 380 void 381 in6_pcbdisconnect(struct inpcb *inp) 382 { 383 bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); 384 inp->inp_fport = 0; 385 /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ 386 inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; 387 in_pcbremconnhash(inp); 388 if (inp->inp_socket->so_state & SS_NOFDREF) 389 in6_pcbdetach(inp); 390 } 391 392 void 393 in6_pcbdetach(struct inpcb *inp) 394 { 395 struct socket *so = inp->inp_socket; 396 struct inpcbinfo *ipi = inp->inp_pcbinfo; 397 398 inp->inp_gencnt = ++ipi->ipi_gencnt; 399 in_pcbremlists(inp); 400 so->so_pcb = NULL; 401 KKASSERT((so->so_state & SS_ASSERTINPROG) == 0); 402 sofree(so); /* remove pcb ref */ 403 404 if (inp->in6p_options) 405 m_freem(inp->in6p_options); 406 ip6_freepcbopts(inp->in6p_outputopts); 407 ip6_freemoptions(inp->in6p_moptions); 408 if (inp->in6p_route.ro_rt) 409 rtfree(inp->in6p_route.ro_rt); 410 /* Check and free IPv4 related resources in case of mapped addr */ 411 if (inp->inp_options) 412 m_free(inp->inp_options); 413 ip_freemoptions(inp->inp_moptions); 414 415 kfree(inp, M_PCB); 416 } 417 418 /* 419 * The socket may have an invalid PCB, i.e. NULL. For example, a TCP 420 * socket received RST. 421 */ 422 static int 423 in6_setsockaddr(struct socket *so, struct sockaddr **nam) 424 { 425 struct inpcb *inp; 426 struct sockaddr_in6 *sin6; 427 428 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 429 inp = so->so_pcb; 430 if (!inp) 431 return EINVAL; 432 433 sin6 = kmalloc(sizeof *sin6, M_SONAME, M_WAITOK | M_ZERO); 434 sin6->sin6_family = AF_INET6; 435 sin6->sin6_len = sizeof(*sin6); 436 sin6->sin6_port = inp->inp_lport; 437 sin6->sin6_addr = inp->in6p_laddr; 438 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 439 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); 440 else 441 sin6->sin6_scope_id = 0; /*XXX*/ 442 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 443 sin6->sin6_addr.s6_addr16[1] = 0; 444 445 *nam = (struct sockaddr *)sin6; 446 return 0; 447 } 448 449 void 450 in6_setsockaddr_dispatch(netmsg_t msg) 451 { 452 int error; 453 454 error = in6_setsockaddr(msg->sockaddr.base.nm_so, msg->sockaddr.nm_nam); 455 lwkt_replymsg(&msg->sockaddr.base.lmsg, error); 456 } 457 458 void 459 in6_setpeeraddr_dispatch(netmsg_t msg) 460 { 461 int error; 462 463 error = in6_setpeeraddr(msg->peeraddr.base.nm_so, msg->peeraddr.nm_nam); 464 lwkt_replymsg(&msg->peeraddr.base.lmsg, error); 465 } 466 467 /* 468 * The socket may have an invalid PCB, i.e. NULL. For example, a TCP 469 * socket received RST. 470 */ 471 int 472 in6_setpeeraddr(struct socket *so, struct sockaddr **nam) 473 { 474 struct inpcb *inp; 475 struct sockaddr_in6 *sin6; 476 477 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 478 inp = so->so_pcb; 479 if (!inp) 480 return EINVAL; 481 482 sin6 = kmalloc(sizeof(*sin6), M_SONAME, M_WAITOK | M_ZERO); 483 sin6->sin6_family = AF_INET6; 484 sin6->sin6_len = sizeof(struct sockaddr_in6); 485 sin6->sin6_port = inp->inp_fport; 486 sin6->sin6_addr = inp->in6p_faddr; 487 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 488 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); 489 else 490 sin6->sin6_scope_id = 0; /*XXX*/ 491 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 492 sin6->sin6_addr.s6_addr16[1] = 0; 493 494 *nam = (struct sockaddr *)sin6; 495 return 0; 496 } 497 498 /* 499 * Pass some notification to all connections of a protocol 500 * associated with address dst. The local address and/or port numbers 501 * may be specified to limit the search. The "usual action" will be 502 * taken, depending on the ctlinput cmd. The caller must filter any 503 * cmds that are uninteresting (e.g., no error in the map). 504 * Call the protocol specific routine (if any) to report 505 * any errors for each matching socket. 506 */ 507 void 508 in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, in_port_t fport, 509 const struct sockaddr *src, in_port_t lport, int cmd, int arg, 510 inp_notify_t notify) 511 { 512 struct inpcb *inp, *marker; 513 struct sockaddr_in6 sa6_src, *sa6_dst; 514 u_int32_t flowinfo; 515 516 if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6) 517 return; 518 519 sa6_dst = (struct sockaddr_in6 *)dst; 520 if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr)) 521 return; 522 523 /* 524 * note that src can be NULL when we get notify by local fragmentation. 525 */ 526 sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src; 527 flowinfo = sa6_src.sin6_flowinfo; 528 529 /* 530 * Redirects go to all references to the destination, 531 * and use in6_rtchange to invalidate the route cache. 532 * Dead host indications: also use in6_rtchange to invalidate 533 * the cache, and deliver the error to all the sockets. 534 * Otherwise, if we have knowledge of the local port and address, 535 * deliver only to that socket. 536 */ 537 if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { 538 fport = 0; 539 lport = 0; 540 bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr)); 541 542 if (cmd != PRC_HOSTDEAD) 543 notify = in6_rtchange; 544 } 545 if (cmd != PRC_MSGSIZE) 546 arg = inet6ctlerrmap[cmd]; 547 548 marker = in_pcbmarker(); 549 550 GET_PCBINFO_TOKEN(pcbinfo); 551 552 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list); 553 while ((inp = LIST_NEXT(marker, inp_list)) != NULL) { 554 LIST_REMOVE(marker, inp_list); 555 LIST_INSERT_AFTER(inp, marker, inp_list); 556 557 if (inp->inp_flags & INP_PLACEMARKER) 558 continue; 559 560 if (!INP_ISIPV6(inp)) 561 continue; 562 /* 563 * If the error designates a new path MTU for a destination 564 * and the application (associated with this socket) wanted to 565 * know the value, notify. Note that we notify for all 566 * disconnected sockets if the corresponding application 567 * wanted. This is because some UDP applications keep sending 568 * sockets disconnected. 569 * XXX: should we avoid to notify the value to TCP sockets? 570 */ 571 if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 && 572 (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || 573 IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr))) { 574 ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst, &arg); 575 } 576 577 /* 578 * Detect if we should notify the error. If no source and 579 * destination ports are specifed, but non-zero flowinfo and 580 * local address match, notify the error. This is the case 581 * when the error is delivered with an encrypted buffer 582 * by ESP. Otherwise, just compare addresses and ports 583 * as usual. 584 */ 585 if (lport == 0 && fport == 0 && flowinfo && 586 inp->inp_socket != NULL && 587 flowinfo == (inp->in6p_flowinfo & IPV6_FLOWLABEL_MASK) && 588 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) 589 goto do_notify; 590 else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, 591 &sa6_dst->sin6_addr) || 592 inp->inp_socket == 0 || 593 (lport && inp->inp_lport != lport) || 594 (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && 595 !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, 596 &sa6_src.sin6_addr)) || 597 (fport && inp->inp_fport != fport)) 598 continue; 599 600 do_notify: 601 if (notify) 602 (*notify)(inp, arg); 603 } 604 LIST_REMOVE(marker, inp_list); 605 606 REL_PCBINFO_TOKEN(pcbinfo); 607 } 608 609 /* 610 * Lookup a PCB based on the local address and port. 611 */ 612 struct inpcb * 613 in6_pcblookup_local(struct inpcbporthead *porthash, 614 const struct in6_addr *laddr, u_int lport_arg, int wild_okay, 615 struct ucred *cred) 616 { 617 struct prison *pscan; 618 struct prison *pr; 619 struct inpcb *inp; 620 int matchwild = 3, wildcard; 621 u_short lport = lport_arg; 622 struct inpcbport *phd; 623 struct inpcb *match = NULL; 624 625 /* 626 * If the porthashbase is shared across several cpus, it must 627 * have been locked. 628 */ 629 ASSERT_PORTHASH_TOKEN_HELD(porthash); 630 631 /* 632 * Best fit PCB lookup. 633 * 634 * First see if this local port is in use by looking on the 635 * port hash list. 636 */ 637 LIST_FOREACH(phd, porthash, phd_hash) { 638 if (phd->phd_port == lport) 639 break; 640 } 641 642 if (phd != NULL) { 643 pr = cred ? cred->cr_prison : NULL; 644 645 /* 646 * Port is in use by one or more PCBs. Look for best 647 * fit. 648 */ 649 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 650 wildcard = 0; 651 if (!INP_ISIPV6(inp)) 652 continue; 653 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) 654 wildcard++; 655 656 if (inp->inp_socket && inp->inp_socket->so_cred) 657 pscan = inp->inp_socket->so_cred->cr_prison; 658 else 659 pscan = NULL; 660 if (pr != pscan) 661 continue; 662 663 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 664 if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) 665 wildcard++; 666 } else { 667 if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 668 wildcard++; 669 else if (!IN6_ARE_ADDR_EQUAL( 670 &inp->in6p_laddr, laddr)) 671 continue; 672 } 673 if (wildcard && !wild_okay) 674 continue; 675 if (wildcard < matchwild) { 676 match = inp; 677 matchwild = wildcard; 678 if (wildcard == 0) 679 break; 680 } 681 } 682 } 683 return (match); 684 } 685 686 void 687 in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) 688 { 689 struct in6pcb *in6p, *marker; 690 struct ip6_moptions *im6o; 691 struct in6_multi_mship *imm, *nimm; 692 693 /* 694 * We only need to make sure that we are in netisr0, where all 695 * multicast operation happen. We could check inpcbinfo which 696 * does not belong to netisr0 by holding the inpcbinfo's token. 697 * In this case, the pcbinfo must be able to be shared, i.e. 698 * pcbinfo->infotoken is not NULL. 699 */ 700 ASSERT_NETISR0; 701 KASSERT(pcbinfo->cpu == 0 || pcbinfo->infotoken != NULL, 702 ("pcbinfo could not be shared")); 703 704 /* 705 * Get a marker for the current netisr (netisr0). 706 * 707 * It is possible that the multicast address deletion blocks, 708 * which could cause temporary token releasing. So we use 709 * inpcb marker here to get a coherent view of the inpcb list. 710 * 711 * While, on the other hand, moptions are only added and deleted 712 * in netisr0, so we would not see staled moption or miss moption 713 * even if the token was released due to the blocking multicast 714 * address deletion. 715 */ 716 marker = in_pcbmarker(); 717 718 GET_PCBINFO_TOKEN(pcbinfo); 719 720 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list); 721 while ((in6p = LIST_NEXT(marker, inp_list)) != NULL) { 722 LIST_REMOVE(marker, inp_list); 723 LIST_INSERT_AFTER(in6p, marker, inp_list); 724 725 if (in6p->in6p_flags & INP_PLACEMARKER) 726 continue; 727 im6o = in6p->in6p_moptions; 728 if (INP_ISIPV6(in6p) && im6o) { 729 /* 730 * Unselect the outgoing interface if it is being 731 * detached. 732 */ 733 if (im6o->im6o_multicast_ifp == ifp) 734 im6o->im6o_multicast_ifp = NULL; 735 736 /* 737 * Drop multicast group membership if we joined 738 * through the interface being detached. 739 * XXX controversial - is it really legal for kernel 740 * to force this? 741 */ 742 for (imm = im6o->im6o_memberships.lh_first; 743 imm != NULL; imm = nimm) { 744 nimm = imm->i6mm_chain.le_next; 745 if (imm->i6mm_maddr->in6m_ifp == ifp) { 746 LIST_REMOVE(imm, i6mm_chain); 747 in6_delmulti(imm->i6mm_maddr); 748 kfree(imm, M_IPMADDR); 749 } 750 } 751 } 752 } 753 LIST_REMOVE(marker, inp_list); 754 755 REL_PCBINFO_TOKEN(pcbinfo); 756 } 757 758 /* 759 * Check for alternatives when higher level complains 760 * about service problems. For now, invalidate cached 761 * routing information. If the route was created dynamically 762 * (by a redirect), time to try a default gateway again. 763 */ 764 void 765 in6_losing(struct inpcb *in6p) 766 { 767 struct rtentry *rt; 768 struct rt_addrinfo info; 769 770 if ((rt = in6p->in6p_route.ro_rt) != NULL) { 771 bzero((caddr_t)&info, sizeof(info)); 772 info.rti_flags = rt->rt_flags; 773 info.rti_info[RTAX_DST] = rt_key(rt); 774 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 775 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 776 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); 777 if (rt->rt_flags & RTF_DYNAMIC) { 778 rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, 779 rt_mask(rt), rt->rt_flags, NULL); 780 } 781 in6p->in6p_route.ro_rt = NULL; 782 rtfree(rt); 783 /* 784 * A new route can be allocated 785 * the next time output is attempted. 786 */ 787 } 788 } 789 790 /* 791 * After a routing change, flush old routing 792 * and allocate a (hopefully) better one. 793 */ 794 void 795 in6_rtchange(struct inpcb *inp, int error) 796 { 797 if (inp->in6p_route.ro_rt) { 798 rtfree(inp->in6p_route.ro_rt); 799 inp->in6p_route.ro_rt = 0; 800 /* 801 * A new route can be allocated the next time 802 * output is attempted. 803 */ 804 } 805 } 806 807 /* 808 * Lookup PCB in hash list. 809 */ 810 struct inpcb * 811 in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, 812 u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, 813 int wildcard, struct ifnet *ifp) 814 { 815 struct inpcbhead *head; 816 struct inpcb *inp; 817 struct inpcb *jinp = NULL; 818 u_short fport = fport_arg, lport = lport_arg; 819 820 /* 821 * First look for an exact match. 822 */ 823 head = &pcbinfo->hashbase[INP_PCBCONNHASH(faddr->s6_addr32[3] /* XXX */, 824 fport, 825 laddr->s6_addr32[3], /* XXX JH */ 826 lport, 827 pcbinfo->hashmask)]; 828 LIST_FOREACH(inp, head, inp_hash) { 829 if (!INP_ISIPV6(inp)) 830 continue; 831 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && 832 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && 833 inp->inp_fport == fport && 834 inp->inp_lport == lport) { 835 /* 836 * Found. 837 */ 838 if (inp->inp_socket == NULL || 839 inp->inp_socket->so_cred->cr_prison == NULL) { 840 return (inp); 841 } 842 if (jinp == NULL) 843 jinp = inp; 844 } 845 } 846 if (jinp != NULL) 847 return(jinp); 848 849 if (wildcard) { 850 struct inpcb *local_wild = NULL; 851 struct inpcb *jinp_wild = NULL; 852 struct inpcontainer *ic; 853 struct inpcontainerhead *chead; 854 struct sockaddr_in6 jsin6; 855 struct ucred *cred; 856 struct prison *pr; 857 int net_listen_ov_local = 0; 858 int net_listen_ov_wild = 0; 859 860 /* 861 * Order of socket selection: 862 * 1. non-jailed, non-wild. 863 * 2. non-jailed, wild. (allow_listen_override on) 864 * 3. jailed, non-wild. 865 * 4. jailed, wild. 866 * 5. non-jailed, wild. (allow_listen_override off) 867 * 868 * NOTE: jailed wildcards are still restricted to the jail 869 * IPs. 870 * 871 * NOTE: (1) and (3) already handled above. 872 */ 873 jsin6.sin6_family = AF_INET6; 874 chead = &pcbinfo->wildcardhashbase[INP_PCBWILDCARDHASH(lport, 875 pcbinfo->wildcardhashmask)]; 876 877 GET_PCBINFO_TOKEN(pcbinfo); 878 LIST_FOREACH(ic, chead, ic_list) { 879 inp = ic->ic_inp; 880 if (inp->inp_flags & INP_PLACEMARKER) 881 continue; 882 883 /* 884 * Basdic validation 885 */ 886 if (!INP_ISIPV6(inp)) 887 continue; 888 if (inp->inp_lport != lport) 889 continue; 890 891 /* 892 * Calculate prison, setup jsin for jailed_ip() 893 * check. 894 */ 895 jsin6.sin6_addr = *laddr; 896 pr = NULL; 897 cred = NULL; 898 if (inp->inp_socket) { 899 cred = inp->inp_socket->so_cred; 900 if (cred) 901 pr = cred->cr_prison; 902 } 903 904 /* 905 * Assign jinp, jinp_wild, and local_wild as 906 * appropriate, track whether the jail supports 907 * listen overrides. 908 */ 909 if (pr) { 910 if (!jailed_ip(pr, (struct sockaddr *)&jsin6)) 911 continue; 912 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) 913 && jinp == NULL) { 914 jinp = inp; 915 if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_NET_LISTEN_OVERRIDE)) 916 net_listen_ov_local = 1; 917 } 918 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && 919 jinp_wild == NULL) { 920 jinp_wild = inp; 921 if (PRISON_CAP_ISSET(pr->pr_caps, PRISON_CAP_NET_LISTEN_OVERRIDE)) 922 net_listen_ov_wild = 1; 923 } 924 } else { 925 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { 926 REL_PCBINFO_TOKEN(pcbinfo); 927 return (inp); 928 } 929 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) 930 local_wild = inp; 931 } 932 } 933 REL_PCBINFO_TOKEN(pcbinfo); 934 935 if (net_listen_ov_local) 936 return jinp; 937 if (net_listen_ov_wild) 938 return jinp_wild; 939 if (local_wild) 940 return local_wild; 941 if (jinp) 942 return jinp; 943 return (jinp_wild); 944 } 945 946 /* 947 * Not found. 948 */ 949 return (NULL); 950 } 951 952 void 953 init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) 954 { 955 struct ip6_hdr *ip; 956 957 ip = mtod(m, struct ip6_hdr *); 958 bzero(sin6, sizeof(*sin6)); 959 sin6->sin6_len = sizeof(*sin6); 960 sin6->sin6_family = AF_INET6; 961 sin6->sin6_addr = ip->ip6_src; 962 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 963 sin6->sin6_addr.s6_addr16[1] = 0; 964 sin6->sin6_scope_id = 965 (m->m_pkthdr.rcvif && IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 966 ? m->m_pkthdr.rcvif->if_index : 0; 967 968 return; 969 } 970 971 void 972 in6_savefaddr(struct socket *so, const struct sockaddr *faddr) 973 { 974 struct sockaddr_in6 *sin6; 975 976 KASSERT(faddr->sa_family == AF_INET6, 977 ("not AF_INET6 faddr %d", faddr->sa_family)); 978 979 sin6 = kmalloc(sizeof(*sin6), M_SONAME, M_WAITOK | M_ZERO); 980 sin6->sin6_family = AF_INET6; 981 sin6->sin6_len = sizeof(*sin6); 982 983 sin6->sin6_port = ((const struct sockaddr_in6 *)faddr)->sin6_port; 984 sin6->sin6_addr = ((const struct sockaddr_in6 *)faddr)->sin6_addr; 985 986 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 987 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); 988 else 989 sin6->sin6_scope_id = 0; /*XXX*/ 990 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 991 sin6->sin6_addr.s6_addr16[1] = 0; 992 993 so->so_faddr = (struct sockaddr *)sin6; 994 } 995