1 /* $FreeBSD: src/sys/netinet6/in6_pcb.c,v 1.10.2.9 2003/01/24 05:11:35 sam Exp $ */ 2 /* $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1991, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 63 */ 64 65 #include "opt_inet.h" 66 #include "opt_inet6.h" 67 #include "opt_ipsec.h" 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/malloc.h> 72 #include <sys/mbuf.h> 73 #include <sys/domain.h> 74 #include <sys/protosw.h> 75 #include <sys/socket.h> 76 #include <sys/socketvar.h> 77 #include <sys/sockio.h> 78 #include <sys/errno.h> 79 #include <sys/time.h> 80 #include <sys/proc.h> 81 #include <sys/priv.h> 82 #include <sys/jail.h> 83 84 #include <sys/thread2.h> 85 #include <sys/msgport2.h> 86 87 #include <vm/vm_zone.h> 88 89 #include <net/if.h> 90 #include <net/if_types.h> 91 #include <net/route.h> 92 #include <net/netisr2.h> 93 94 #include <netinet/in.h> 95 #include <netinet/in_var.h> 96 #include <netinet/in_systm.h> 97 #include <netinet/ip6.h> 98 #include <netinet/ip_var.h> 99 #include <netinet6/ip6_var.h> 100 #include <netinet6/nd6.h> 101 #include <netinet/in_pcb.h> 102 #include <netinet6/in6_pcb.h> 103 104 #ifdef IPSEC 105 #include <netinet6/ipsec.h> 106 #ifdef INET6 107 #include <netinet6/ipsec6.h> 108 #endif 109 #include <netinet6/ah.h> 110 #ifdef INET6 111 #include <netinet6/ah6.h> 112 #endif 113 #include <netproto/key/key.h> 114 #endif /* IPSEC */ 115 116 #ifdef FAST_IPSEC 117 #include <netproto/ipsec/ipsec.h> 118 #include <netproto/ipsec/ipsec6.h> 119 #include <netproto/ipsec/key.h> 120 #define IPSEC 121 #endif /* FAST_IPSEC */ 122 123 struct in6_addr zeroin6_addr; 124 125 int 126 in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 127 { 128 struct socket *so = inp->inp_socket; 129 struct sockaddr_in6 jsin6; 130 int error; 131 132 if (!in6_ifaddr) /* XXX broken! */ 133 return (EADDRNOTAVAIL); 134 if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 135 return (EINVAL); 136 137 if (nam) { 138 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 139 struct inpcbinfo *pcbinfo; 140 struct inpcbportinfo *portinfo; 141 struct inpcbporthead *porthash; 142 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 143 struct ucred *cred = NULL; 144 struct inpcb *t; 145 u_short lport, lport_ho; 146 147 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 148 wild = 1; 149 if (td->td_proc != NULL) 150 cred = td->td_proc->p_ucred; 151 152 if (nam->sa_len != sizeof(*sin6)) 153 return (EINVAL); 154 /* 155 * family check. 156 */ 157 if (nam->sa_family != AF_INET6) 158 return (EAFNOSUPPORT); 159 160 /* Reject v4-mapped address */ 161 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 162 return (EADDRNOTAVAIL); 163 164 if (!prison_replace_wildcards(td, nam)) 165 return (EINVAL); 166 167 /* KAME hack: embed scopeid */ 168 if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL) != 0) 169 return (EINVAL); 170 /* this must be cleared for ifa_ifwithaddr() */ 171 sin6->sin6_scope_id = 0; 172 173 lport = sin6->sin6_port; 174 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 175 /* 176 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 177 * allow compepte duplication of binding if 178 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 179 * and a multicast address is bound on both 180 * new and duplicated sockets. 181 */ 182 if (so->so_options & SO_REUSEADDR) 183 reuseport = SO_REUSEADDR|SO_REUSEPORT; 184 } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 185 struct ifaddr *ia = NULL; 186 187 sin6->sin6_port = 0; /* yech... */ 188 if (!prison_replace_wildcards(td, (struct sockaddr *)sin6)) { 189 sin6->sin6_addr = kin6addr_any; 190 return (EINVAL); 191 } 192 if ((ia = ifa_ifwithaddr((struct sockaddr *)sin6)) == NULL) 193 return (EADDRNOTAVAIL); 194 195 /* 196 * XXX: bind to an anycast address might accidentally 197 * cause sending a packet with anycast source address. 198 * We should allow to bind to a deprecated address, since 199 * the application dares to use it. 200 */ 201 if (ia && 202 ((struct in6_ifaddr *)ia)->ia6_flags & 203 (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) 204 return (EADDRNOTAVAIL); 205 } 206 207 inp->in6p_laddr = sin6->sin6_addr; 208 209 if (lport == 0) 210 goto auto_select; 211 lport_ho = ntohs(lport); 212 213 /* GROSS */ 214 if (lport_ho < IPV6PORT_RESERVED && cred && 215 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0)) { 216 inp->in6p_laddr = kin6addr_any; 217 return (EACCES); 218 } 219 220 /* 221 * Locate the proper portinfo based on lport 222 */ 223 pcbinfo = inp->inp_pcbinfo; 224 portinfo = 225 &pcbinfo->portinfo[lport_ho & pcbinfo->portinfo_mask]; 226 KKASSERT((lport_ho & pcbinfo->portinfo_mask) == 227 portinfo->offset); 228 229 /* 230 * This has to be atomic. If the porthash is shared across 231 * multiple protocol threads (aka tcp) then the token must 232 * be held. 233 */ 234 porthash = in_pcbporthash_head(portinfo, lport); 235 GET_PORTHASH_TOKEN(porthash); 236 237 if (so->so_cred->cr_uid != 0 && 238 !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 239 t = in6_pcblookup_local(porthash, 240 &sin6->sin6_addr, lport, INPLOOKUP_WILDCARD, cred); 241 if (t && 242 (so->so_cred->cr_uid != 243 t->inp_socket->so_cred->cr_uid)) { 244 inp->in6p_laddr = kin6addr_any; 245 error = EADDRINUSE; 246 goto done; 247 } 248 } 249 if (cred && cred->cr_prison && 250 !prison_replace_wildcards(td, nam)) { 251 inp->in6p_laddr = kin6addr_any; 252 error = EADDRNOTAVAIL; 253 goto done; 254 } 255 t = in6_pcblookup_local(porthash, &sin6->sin6_addr, lport, 256 wild, cred); 257 if (t && (reuseport & t->inp_socket->so_options) == 0) { 258 inp->in6p_laddr = kin6addr_any; 259 error = EADDRINUSE; 260 goto done; 261 } 262 263 inp->inp_lport = lport; 264 in_pcbinsporthash(porthash, inp); 265 error = 0; 266 done: 267 REL_PORTHASH_TOKEN(porthash); 268 return (error); 269 } else { 270 auto_select: 271 jsin6.sin6_addr = inp->in6p_laddr; 272 jsin6.sin6_family = AF_INET6; 273 if (!prison_replace_wildcards(td, (struct sockaddr*)&jsin6)) { 274 inp->in6p_laddr = kin6addr_any; 275 inp->inp_lport = 0; 276 return (EINVAL); 277 } 278 279 return in6_pcbsetlport(&inp->in6p_laddr, inp, td); 280 } 281 } 282 283 /* 284 * Transform old in6_pcbconnect() into an inner subroutine for new 285 * in6_pcbconnect(): Do some validity-checking on the remote 286 * address (in mbuf 'nam') and then determine local host address 287 * (i.e., which interface) to use to access that remote host. 288 * 289 * This preserves definition of in6_pcbconnect(), while supporting a 290 * slightly different version for T/TCP. (This is more than 291 * a bit of a kludge, but cleaning up the internal interfaces would 292 * have forced minor changes in every protocol). 293 */ 294 295 int 296 in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam, 297 struct in6_addr **plocal_addr6, struct thread *td) 298 { 299 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 300 struct ifnet *ifp = NULL; 301 int error = 0; 302 303 if (nam->sa_len != sizeof (*sin6)) 304 return (EINVAL); 305 if (sin6->sin6_family != AF_INET6) 306 return (EAFNOSUPPORT); 307 if (sin6->sin6_port == 0) 308 return (EADDRNOTAVAIL); 309 310 /* KAME hack: embed scopeid */ 311 if (in6_embedscope(&sin6->sin6_addr, sin6, inp, &ifp) != 0) 312 return EINVAL; 313 314 if (in6_ifaddr) { 315 /* 316 * If the destination address is UNSPECIFIED addr, 317 * use the loopback addr, e.g ::1. 318 */ 319 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 320 sin6->sin6_addr = kin6addr_loopback; 321 } 322 { 323 /* 324 * XXX: in6_selectsrc might replace the bound local address 325 * with the address specified by setsockopt(IPV6_PKTINFO). 326 * Is it the intended behavior? 327 */ 328 *plocal_addr6 = in6_selectsrc(sin6, inp->in6p_outputopts, 329 inp->in6p_moptions, 330 &inp->in6p_route, 331 &inp->in6p_laddr, &error, td); 332 if (*plocal_addr6 == NULL) { 333 if (error == 0) 334 error = EADDRNOTAVAIL; 335 return (error); 336 } 337 /* 338 * Don't do pcblookup call here; return interface in 339 * plocal_addr6 340 * and exit to caller, that will do the lookup. 341 */ 342 } 343 344 if (inp->in6p_route.ro_rt) 345 ifp = inp->in6p_route.ro_rt->rt_ifp; 346 347 return (0); 348 } 349 350 /* 351 * Outer subroutine: 352 * Connect from a socket to a specified address. 353 * Both address and port must be specified in argument sin. 354 * If don't have a local address for this socket yet, 355 * then pick one. 356 */ 357 int 358 in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 359 { 360 struct in6_addr *addr6; 361 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 362 int error; 363 364 /* Reject v4-mapped address */ 365 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 366 return EADDRNOTAVAIL; 367 368 /* 369 * Call inner routine, to assign local interface address. 370 * in6_pcbladdr() may automatically fill in sin6_scope_id. 371 */ 372 if ((error = in6_pcbladdr(inp, nam, &addr6, td)) != 0) 373 return (error); 374 375 if (in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr, 376 sin6->sin6_port, 377 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 378 ? addr6 : &inp->in6p_laddr, 379 inp->inp_lport, 0, NULL) != NULL) { 380 return (EADDRINUSE); 381 } 382 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 383 if (inp->inp_lport == 0) { 384 error = in6_pcbbind(inp, NULL, td); 385 if (error) 386 return (error); 387 } 388 inp->in6p_laddr = *addr6; 389 } 390 inp->in6p_faddr = sin6->sin6_addr; 391 inp->inp_fport = sin6->sin6_port; 392 /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ 393 inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; 394 if (inp->in6p_flags & IN6P_AUTOFLOWLABEL) 395 inp->in6p_flowinfo |= 396 (htonl(ip6_flow_seq++) & IPV6_FLOWLABEL_MASK); 397 398 in_pcbinsconnhash(inp); 399 return (0); 400 } 401 402 void 403 in6_pcbdisconnect(struct inpcb *inp) 404 { 405 bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); 406 inp->inp_fport = 0; 407 /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ 408 inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; 409 in_pcbremconnhash(inp); 410 if (inp->inp_socket->so_state & SS_NOFDREF) 411 in6_pcbdetach(inp); 412 } 413 414 void 415 in6_pcbdetach(struct inpcb *inp) 416 { 417 struct socket *so = inp->inp_socket; 418 struct inpcbinfo *ipi = inp->inp_pcbinfo; 419 420 #ifdef IPSEC 421 if (inp->in6p_sp != NULL) 422 ipsec6_delete_pcbpolicy(inp); 423 #endif /* IPSEC */ 424 inp->inp_gencnt = ++ipi->ipi_gencnt; 425 in_pcbremlists(inp); 426 so->so_pcb = NULL; 427 KKASSERT((so->so_state & SS_ASSERTINPROG) == 0); 428 sofree(so); /* remove pcb ref */ 429 430 if (inp->in6p_options) 431 m_freem(inp->in6p_options); 432 ip6_freepcbopts(inp->in6p_outputopts); 433 ip6_freemoptions(inp->in6p_moptions); 434 if (inp->in6p_route.ro_rt) 435 rtfree(inp->in6p_route.ro_rt); 436 /* Check and free IPv4 related resources in case of mapped addr */ 437 if (inp->inp_options) 438 m_free(inp->inp_options); 439 ip_freemoptions(inp->inp_moptions); 440 441 kfree(inp, M_PCB); 442 } 443 444 /* 445 * The socket may have an invalid PCB, i.e. NULL. For example, a TCP 446 * socket received RST. 447 */ 448 static int 449 in6_setsockaddr(struct socket *so, struct sockaddr **nam) 450 { 451 struct inpcb *inp; 452 struct sockaddr_in6 *sin6; 453 454 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 455 inp = so->so_pcb; 456 if (!inp) 457 return EINVAL; 458 459 sin6 = kmalloc(sizeof *sin6, M_SONAME, M_WAITOK | M_ZERO); 460 sin6->sin6_family = AF_INET6; 461 sin6->sin6_len = sizeof(*sin6); 462 sin6->sin6_port = inp->inp_lport; 463 sin6->sin6_addr = inp->in6p_laddr; 464 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 465 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); 466 else 467 sin6->sin6_scope_id = 0; /*XXX*/ 468 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 469 sin6->sin6_addr.s6_addr16[1] = 0; 470 471 *nam = (struct sockaddr *)sin6; 472 return 0; 473 } 474 475 void 476 in6_setsockaddr_dispatch(netmsg_t msg) 477 { 478 int error; 479 480 error = in6_setsockaddr(msg->sockaddr.base.nm_so, msg->sockaddr.nm_nam); 481 lwkt_replymsg(&msg->sockaddr.base.lmsg, error); 482 } 483 484 void 485 in6_setpeeraddr_dispatch(netmsg_t msg) 486 { 487 int error; 488 489 error = in6_setpeeraddr(msg->peeraddr.base.nm_so, msg->peeraddr.nm_nam); 490 lwkt_replymsg(&msg->peeraddr.base.lmsg, error); 491 } 492 493 /* 494 * The socket may have an invalid PCB, i.e. NULL. For example, a TCP 495 * socket received RST. 496 */ 497 int 498 in6_setpeeraddr(struct socket *so, struct sockaddr **nam) 499 { 500 struct inpcb *inp; 501 struct sockaddr_in6 *sin6; 502 503 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 504 inp = so->so_pcb; 505 if (!inp) 506 return EINVAL; 507 508 sin6 = kmalloc(sizeof(*sin6), M_SONAME, M_WAITOK | M_ZERO); 509 sin6->sin6_family = AF_INET6; 510 sin6->sin6_len = sizeof(struct sockaddr_in6); 511 sin6->sin6_port = inp->inp_fport; 512 sin6->sin6_addr = inp->in6p_faddr; 513 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 514 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); 515 else 516 sin6->sin6_scope_id = 0; /*XXX*/ 517 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 518 sin6->sin6_addr.s6_addr16[1] = 0; 519 520 *nam = (struct sockaddr *)sin6; 521 return 0; 522 } 523 524 /* 525 * Pass some notification to all connections of a protocol 526 * associated with address dst. The local address and/or port numbers 527 * may be specified to limit the search. The "usual action" will be 528 * taken, depending on the ctlinput cmd. The caller must filter any 529 * cmds that are uninteresting (e.g., no error in the map). 530 * Call the protocol specific routine (if any) to report 531 * any errors for each matching socket. 532 */ 533 void 534 in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, in_port_t fport, 535 const struct sockaddr *src, in_port_t lport, int cmd, int arg, 536 inp_notify_t notify) 537 { 538 struct inpcb *inp, *marker; 539 struct sockaddr_in6 sa6_src, *sa6_dst; 540 u_int32_t flowinfo; 541 542 if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6) 543 return; 544 545 sa6_dst = (struct sockaddr_in6 *)dst; 546 if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr)) 547 return; 548 549 /* 550 * note that src can be NULL when we get notify by local fragmentation. 551 */ 552 sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src; 553 flowinfo = sa6_src.sin6_flowinfo; 554 555 /* 556 * Redirects go to all references to the destination, 557 * and use in6_rtchange to invalidate the route cache. 558 * Dead host indications: also use in6_rtchange to invalidate 559 * the cache, and deliver the error to all the sockets. 560 * Otherwise, if we have knowledge of the local port and address, 561 * deliver only to that socket. 562 */ 563 if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { 564 fport = 0; 565 lport = 0; 566 bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr)); 567 568 if (cmd != PRC_HOSTDEAD) 569 notify = in6_rtchange; 570 } 571 if (cmd != PRC_MSGSIZE) 572 arg = inet6ctlerrmap[cmd]; 573 574 marker = in_pcbmarker(mycpuid); 575 576 GET_PCBINFO_TOKEN(pcbinfo); 577 578 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list); 579 while ((inp = LIST_NEXT(marker, inp_list)) != NULL) { 580 LIST_REMOVE(marker, inp_list); 581 LIST_INSERT_AFTER(inp, marker, inp_list); 582 583 if (inp->inp_flags & INP_PLACEMARKER) 584 continue; 585 586 if (!INP_ISIPV6(inp)) 587 continue; 588 /* 589 * If the error designates a new path MTU for a destination 590 * and the application (associated with this socket) wanted to 591 * know the value, notify. Note that we notify for all 592 * disconnected sockets if the corresponding application 593 * wanted. This is because some UDP applications keep sending 594 * sockets disconnected. 595 * XXX: should we avoid to notify the value to TCP sockets? 596 */ 597 if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 && 598 (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || 599 IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr))) { 600 ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst, &arg); 601 } 602 603 /* 604 * Detect if we should notify the error. If no source and 605 * destination ports are specifed, but non-zero flowinfo and 606 * local address match, notify the error. This is the case 607 * when the error is delivered with an encrypted buffer 608 * by ESP. Otherwise, just compare addresses and ports 609 * as usual. 610 */ 611 if (lport == 0 && fport == 0 && flowinfo && 612 inp->inp_socket != NULL && 613 flowinfo == (inp->in6p_flowinfo & IPV6_FLOWLABEL_MASK) && 614 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) 615 goto do_notify; 616 else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, 617 &sa6_dst->sin6_addr) || 618 inp->inp_socket == 0 || 619 (lport && inp->inp_lport != lport) || 620 (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && 621 !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, 622 &sa6_src.sin6_addr)) || 623 (fport && inp->inp_fport != fport)) 624 continue; 625 626 do_notify: 627 if (notify) 628 (*notify)(inp, arg); 629 } 630 LIST_REMOVE(marker, inp_list); 631 632 REL_PCBINFO_TOKEN(pcbinfo); 633 } 634 635 /* 636 * Lookup a PCB based on the local address and port. 637 */ 638 struct inpcb * 639 in6_pcblookup_local(struct inpcbporthead *porthash, 640 const struct in6_addr *laddr, u_int lport_arg, int wild_okay, 641 struct ucred *cred) 642 { 643 struct inpcb *inp; 644 int matchwild = 3, wildcard; 645 u_short lport = lport_arg; 646 struct inpcbport *phd; 647 struct inpcb *match = NULL; 648 649 /* 650 * If the porthashbase is shared across several cpus, it must 651 * have been locked. 652 */ 653 ASSERT_PORTHASH_TOKEN_HELD(porthash); 654 655 /* 656 * Best fit PCB lookup. 657 * 658 * First see if this local port is in use by looking on the 659 * port hash list. 660 */ 661 LIST_FOREACH(phd, porthash, phd_hash) { 662 if (phd->phd_port == lport) 663 break; 664 } 665 666 if (phd != NULL) { 667 /* 668 * Port is in use by one or more PCBs. Look for best 669 * fit. 670 */ 671 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 672 wildcard = 0; 673 if (!INP_ISIPV6(inp)) 674 continue; 675 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) 676 wildcard++; 677 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 678 if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 679 wildcard++; 680 else if (!IN6_ARE_ADDR_EQUAL( 681 &inp->in6p_laddr, laddr)) 682 continue; 683 } else { 684 if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) 685 wildcard++; 686 } 687 if (wildcard && !wild_okay) 688 continue; 689 if (wildcard < matchwild && 690 (cred == NULL || 691 cred->cr_prison == 692 inp->inp_socket->so_cred->cr_prison)) { 693 match = inp; 694 matchwild = wildcard; 695 if (wildcard == 0) 696 break; 697 else 698 matchwild = wildcard; 699 } 700 } 701 } 702 return (match); 703 } 704 705 void 706 in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) 707 { 708 struct in6pcb *in6p, *marker; 709 struct ip6_moptions *im6o; 710 struct in6_multi_mship *imm, *nimm; 711 712 /* 713 * We only need to make sure that we are in netisr0, where all 714 * multicast operation happen. We could check inpcbinfo which 715 * does not belong to netisr0 by holding the inpcbinfo's token. 716 * In this case, the pcbinfo must be able to be shared, i.e. 717 * pcbinfo->infotoken is not NULL. 718 */ 719 ASSERT_IN_NETISR(0); 720 KASSERT(pcbinfo->cpu == 0 || pcbinfo->infotoken != NULL, 721 ("pcbinfo could not be shared")); 722 723 /* 724 * Get a marker for the current netisr (netisr0). 725 * 726 * It is possible that the multicast address deletion blocks, 727 * which could cause temporary token releasing. So we use 728 * inpcb marker here to get a coherent view of the inpcb list. 729 * 730 * While, on the other hand, moptions are only added and deleted 731 * in netisr0, so we would not see staled moption or miss moption 732 * even if the token was released due to the blocking multicast 733 * address deletion. 734 */ 735 marker = in_pcbmarker(mycpuid); 736 737 GET_PCBINFO_TOKEN(pcbinfo); 738 739 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list); 740 while ((in6p = LIST_NEXT(marker, inp_list)) != NULL) { 741 LIST_REMOVE(marker, inp_list); 742 LIST_INSERT_AFTER(in6p, marker, inp_list); 743 744 if (in6p->in6p_flags & INP_PLACEMARKER) 745 continue; 746 im6o = in6p->in6p_moptions; 747 if (INP_ISIPV6(in6p) && im6o) { 748 /* 749 * Unselect the outgoing interface if it is being 750 * detached. 751 */ 752 if (im6o->im6o_multicast_ifp == ifp) 753 im6o->im6o_multicast_ifp = NULL; 754 755 /* 756 * Drop multicast group membership if we joined 757 * through the interface being detached. 758 * XXX controversial - is it really legal for kernel 759 * to force this? 760 */ 761 for (imm = im6o->im6o_memberships.lh_first; 762 imm != NULL; imm = nimm) { 763 nimm = imm->i6mm_chain.le_next; 764 if (imm->i6mm_maddr->in6m_ifp == ifp) { 765 LIST_REMOVE(imm, i6mm_chain); 766 in6_delmulti(imm->i6mm_maddr); 767 kfree(imm, M_IPMADDR); 768 } 769 } 770 } 771 } 772 LIST_REMOVE(marker, inp_list); 773 774 REL_PCBINFO_TOKEN(pcbinfo); 775 } 776 777 /* 778 * Check for alternatives when higher level complains 779 * about service problems. For now, invalidate cached 780 * routing information. If the route was created dynamically 781 * (by a redirect), time to try a default gateway again. 782 */ 783 void 784 in6_losing(struct inpcb *in6p) 785 { 786 struct rtentry *rt; 787 struct rt_addrinfo info; 788 789 if ((rt = in6p->in6p_route.ro_rt) != NULL) { 790 bzero((caddr_t)&info, sizeof(info)); 791 info.rti_flags = rt->rt_flags; 792 info.rti_info[RTAX_DST] = rt_key(rt); 793 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 794 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 795 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); 796 if (rt->rt_flags & RTF_DYNAMIC) { 797 rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, 798 rt_mask(rt), rt->rt_flags, NULL); 799 } 800 in6p->in6p_route.ro_rt = NULL; 801 rtfree(rt); 802 /* 803 * A new route can be allocated 804 * the next time output is attempted. 805 */ 806 } 807 } 808 809 /* 810 * After a routing change, flush old routing 811 * and allocate a (hopefully) better one. 812 */ 813 void 814 in6_rtchange(struct inpcb *inp, int error) 815 { 816 if (inp->in6p_route.ro_rt) { 817 rtfree(inp->in6p_route.ro_rt); 818 inp->in6p_route.ro_rt = 0; 819 /* 820 * A new route can be allocated the next time 821 * output is attempted. 822 */ 823 } 824 } 825 826 /* 827 * Lookup PCB in hash list. 828 */ 829 struct inpcb * 830 in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, 831 u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, 832 int wildcard, struct ifnet *ifp) 833 { 834 struct inpcbhead *head; 835 struct inpcb *inp; 836 struct inpcb *jinp = NULL; 837 u_short fport = fport_arg, lport = lport_arg; 838 int faith; 839 840 if (faithprefix_p != NULL) 841 faith = (*faithprefix_p)(laddr); 842 else 843 faith = 0; 844 845 /* 846 * First look for an exact match. 847 */ 848 head = &pcbinfo->hashbase[INP_PCBCONNHASH(faddr->s6_addr32[3] /* XXX */, 849 fport, 850 laddr->s6_addr32[3], /* XXX JH */ 851 lport, 852 pcbinfo->hashmask)]; 853 LIST_FOREACH(inp, head, inp_hash) { 854 if (!INP_ISIPV6(inp)) 855 continue; 856 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && 857 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && 858 inp->inp_fport == fport && 859 inp->inp_lport == lport) { 860 /* 861 * Found. 862 */ 863 if (inp->inp_socket == NULL || 864 inp->inp_socket->so_cred->cr_prison == NULL) { 865 return (inp); 866 } else { 867 if (jinp == NULL) 868 jinp = inp; 869 } 870 } 871 } 872 if (jinp != NULL) 873 return(jinp); 874 875 if (wildcard) { 876 struct inpcontainerhead *chead; 877 struct inpcontainer *ic; 878 struct inpcb *local_wild = NULL; 879 struct inpcb *jinp_wild = NULL; 880 struct sockaddr_in6 jsin6; 881 struct ucred *cred; 882 883 /* 884 * Order of socket selection: 885 * 1. non-jailed, non-wild. 886 * 2. non-jailed, wild. 887 * 3. jailed, non-wild. 888 * 4. jailed, wild. 889 */ 890 jsin6.sin6_family = AF_INET6; 891 chead = &pcbinfo->wildcardhashbase[INP_PCBWILDCARDHASH(lport, 892 pcbinfo->wildcardhashmask)]; 893 894 GET_PCBINFO_TOKEN(pcbinfo); 895 LIST_FOREACH(ic, chead, ic_list) { 896 inp = ic->ic_inp; 897 if (inp->inp_flags & INP_PLACEMARKER) 898 continue; 899 900 if (!INP_ISIPV6(inp)) 901 continue; 902 if (inp->inp_socket != NULL) 903 cred = inp->inp_socket->so_cred; 904 else 905 cred = NULL; 906 907 if (cred != NULL && jailed(cred)) { 908 if (jinp != NULL) { 909 continue; 910 } else { 911 jsin6.sin6_addr = *laddr; 912 if (!jailed_ip(cred->cr_prison, 913 (struct sockaddr *)&jsin6)) 914 continue; 915 } 916 } 917 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && 918 inp->inp_lport == lport) { 919 if (faith && (inp->inp_flags & INP_FAITH) == 0) 920 continue; 921 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, 922 laddr)) { 923 if (cred != NULL && jailed(cred)) { 924 jinp = inp; 925 } else { 926 REL_PCBINFO_TOKEN(pcbinfo); 927 return (inp); 928 } 929 } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 930 if (cred != NULL && jailed(cred)) 931 jinp_wild = inp; 932 else 933 local_wild = inp; 934 } 935 } 936 } 937 REL_PCBINFO_TOKEN(pcbinfo); 938 939 if (local_wild != NULL) 940 return (local_wild); 941 if (jinp != NULL) 942 return (jinp); 943 return (jinp_wild); 944 } 945 946 /* 947 * Not found. 948 */ 949 return (NULL); 950 } 951 952 void 953 init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) 954 { 955 struct ip6_hdr *ip; 956 957 ip = mtod(m, struct ip6_hdr *); 958 bzero(sin6, sizeof(*sin6)); 959 sin6->sin6_len = sizeof(*sin6); 960 sin6->sin6_family = AF_INET6; 961 sin6->sin6_addr = ip->ip6_src; 962 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 963 sin6->sin6_addr.s6_addr16[1] = 0; 964 sin6->sin6_scope_id = 965 (m->m_pkthdr.rcvif && IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 966 ? m->m_pkthdr.rcvif->if_index : 0; 967 968 return; 969 } 970 971 void 972 in6_savefaddr(struct socket *so, const struct sockaddr *faddr) 973 { 974 struct sockaddr_in6 *sin6; 975 976 KASSERT(faddr->sa_family == AF_INET6, 977 ("not AF_INET6 faddr %d", faddr->sa_family)); 978 979 sin6 = kmalloc(sizeof(*sin6), M_SONAME, M_WAITOK | M_ZERO); 980 sin6->sin6_family = AF_INET6; 981 sin6->sin6_len = sizeof(*sin6); 982 983 sin6->sin6_port = ((const struct sockaddr_in6 *)faddr)->sin6_port; 984 sin6->sin6_addr = ((const struct sockaddr_in6 *)faddr)->sin6_addr; 985 986 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 987 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); 988 else 989 sin6->sin6_scope_id = 0; /*XXX*/ 990 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 991 sin6->sin6_addr.s6_addr16[1] = 0; 992 993 so->so_faddr = (struct sockaddr *)sin6; 994 } 995