1 /* $FreeBSD: src/sys/netinet6/in6_pcb.c,v 1.10.2.9 2003/01/24 05:11:35 sam Exp $ */ 2 /* $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 */ 33 34 /* 35 * Copyright (c) 1982, 1986, 1991, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 63 */ 64 65 #include "opt_inet.h" 66 #include "opt_inet6.h" 67 #include "opt_ipsec.h" 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/malloc.h> 72 #include <sys/mbuf.h> 73 #include <sys/domain.h> 74 #include <sys/protosw.h> 75 #include <sys/socket.h> 76 #include <sys/socketvar.h> 77 #include <sys/sockio.h> 78 #include <sys/errno.h> 79 #include <sys/time.h> 80 #include <sys/proc.h> 81 #include <sys/priv.h> 82 #include <sys/jail.h> 83 84 #include <sys/thread2.h> 85 #include <sys/msgport2.h> 86 87 #include <vm/vm_zone.h> 88 89 #include <net/if.h> 90 #include <net/if_types.h> 91 #include <net/route.h> 92 #include <net/netisr2.h> 93 94 #include <netinet/in.h> 95 #include <netinet/in_var.h> 96 #include <netinet/in_systm.h> 97 #include <netinet/ip6.h> 98 #include <netinet/ip_var.h> 99 #include <netinet6/ip6_var.h> 100 #include <netinet6/nd6.h> 101 #include <netinet/in_pcb.h> 102 #include <netinet6/in6_pcb.h> 103 104 #ifdef IPSEC 105 #include <netinet6/ipsec.h> 106 #ifdef INET6 107 #include <netinet6/ipsec6.h> 108 #endif 109 #include <netinet6/ah.h> 110 #ifdef INET6 111 #include <netinet6/ah6.h> 112 #endif 113 #include <netproto/key/key.h> 114 #endif /* IPSEC */ 115 116 #ifdef FAST_IPSEC 117 #include <netproto/ipsec/ipsec.h> 118 #include <netproto/ipsec/ipsec6.h> 119 #include <netproto/ipsec/key.h> 120 #define IPSEC 121 #endif /* FAST_IPSEC */ 122 123 struct in6_addr zeroin6_addr; 124 125 int 126 in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 127 { 128 struct socket *so = inp->inp_socket; 129 struct sockaddr_in6 jsin6; 130 int error; 131 132 if (!in6_ifaddr) /* XXX broken! */ 133 return (EADDRNOTAVAIL); 134 if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 135 return (EINVAL); 136 137 if (nam) { 138 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 139 struct inpcbinfo *pcbinfo; 140 struct inpcbportinfo *portinfo; 141 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 142 struct ucred *cred = NULL; 143 struct inpcb *t; 144 u_short lport, lport_ho; 145 146 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 147 wild = 1; 148 if (td->td_proc != NULL) 149 cred = td->td_proc->p_ucred; 150 151 if (nam->sa_len != sizeof(*sin6)) 152 return (EINVAL); 153 /* 154 * family check. 155 */ 156 if (nam->sa_family != AF_INET6) 157 return (EAFNOSUPPORT); 158 159 /* Reject v4-mapped address */ 160 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 161 return (EADDRNOTAVAIL); 162 163 if (!prison_replace_wildcards(td, nam)) 164 return (EINVAL); 165 166 /* KAME hack: embed scopeid */ 167 if (in6_embedscope(&sin6->sin6_addr, sin6, inp, NULL) != 0) 168 return (EINVAL); 169 /* this must be cleared for ifa_ifwithaddr() */ 170 sin6->sin6_scope_id = 0; 171 172 lport = sin6->sin6_port; 173 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 174 /* 175 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 176 * allow compepte duplication of binding if 177 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 178 * and a multicast address is bound on both 179 * new and duplicated sockets. 180 */ 181 if (so->so_options & SO_REUSEADDR) 182 reuseport = SO_REUSEADDR|SO_REUSEPORT; 183 } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 184 struct ifaddr *ia = NULL; 185 186 sin6->sin6_port = 0; /* yech... */ 187 if (!prison_replace_wildcards(td, (struct sockaddr *)sin6)) { 188 sin6->sin6_addr = kin6addr_any; 189 return (EINVAL); 190 } 191 if ((ia = ifa_ifwithaddr((struct sockaddr *)sin6)) == NULL) 192 return (EADDRNOTAVAIL); 193 194 /* 195 * XXX: bind to an anycast address might accidentally 196 * cause sending a packet with anycast source address. 197 * We should allow to bind to a deprecated address, since 198 * the application dares to use it. 199 */ 200 if (ia && 201 ((struct in6_ifaddr *)ia)->ia6_flags & 202 (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) 203 return (EADDRNOTAVAIL); 204 } 205 206 inp->in6p_laddr = sin6->sin6_addr; 207 208 if (lport == 0) 209 goto auto_select; 210 lport_ho = ntohs(lport); 211 212 /* GROSS */ 213 if (lport_ho < IPV6PORT_RESERVED && cred && 214 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0)) { 215 inp->in6p_laddr = kin6addr_any; 216 return (EACCES); 217 } 218 219 /* 220 * Locate the proper portinfo based on lport 221 */ 222 pcbinfo = inp->inp_pcbinfo; 223 portinfo = 224 &pcbinfo->portinfo[lport_ho & pcbinfo->portinfo_mask]; 225 KKASSERT((lport_ho & pcbinfo->portinfo_mask) == 226 portinfo->offset); 227 228 /* 229 * This has to be atomic. If the porthash is shared across 230 * multiple protocol threads (aka tcp) then the token must 231 * be held. 232 */ 233 GET_PORT_TOKEN(portinfo); 234 235 if (so->so_cred->cr_uid != 0 && 236 !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 237 t = in6_pcblookup_local(portinfo, 238 &sin6->sin6_addr, lport, INPLOOKUP_WILDCARD, cred); 239 if (t && 240 (so->so_cred->cr_uid != 241 t->inp_socket->so_cred->cr_uid)) { 242 inp->in6p_laddr = kin6addr_any; 243 error = EADDRINUSE; 244 goto done; 245 } 246 } 247 if (cred && cred->cr_prison && 248 !prison_replace_wildcards(td, nam)) { 249 inp->in6p_laddr = kin6addr_any; 250 error = EADDRNOTAVAIL; 251 goto done; 252 } 253 t = in6_pcblookup_local(portinfo, &sin6->sin6_addr, lport, 254 wild, cred); 255 if (t && (reuseport & t->inp_socket->so_options) == 0) { 256 inp->in6p_laddr = kin6addr_any; 257 error = EADDRINUSE; 258 goto done; 259 } 260 261 inp->inp_lport = lport; 262 in_pcbinsporthash(portinfo, inp); 263 error = 0; 264 done: 265 REL_PORT_TOKEN(portinfo); 266 return (error); 267 } else { 268 auto_select: 269 jsin6.sin6_addr = inp->in6p_laddr; 270 jsin6.sin6_family = AF_INET6; 271 if (!prison_replace_wildcards(td, (struct sockaddr*)&jsin6)) { 272 inp->in6p_laddr = kin6addr_any; 273 inp->inp_lport = 0; 274 return (EINVAL); 275 } 276 277 return in6_pcbsetlport(&inp->in6p_laddr, inp, td); 278 } 279 } 280 281 /* 282 * Transform old in6_pcbconnect() into an inner subroutine for new 283 * in6_pcbconnect(): Do some validity-checking on the remote 284 * address (in mbuf 'nam') and then determine local host address 285 * (i.e., which interface) to use to access that remote host. 286 * 287 * This preserves definition of in6_pcbconnect(), while supporting a 288 * slightly different version for T/TCP. (This is more than 289 * a bit of a kludge, but cleaning up the internal interfaces would 290 * have forced minor changes in every protocol). 291 */ 292 293 int 294 in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam, 295 struct in6_addr **plocal_addr6, struct thread *td) 296 { 297 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 298 struct ifnet *ifp = NULL; 299 int error = 0; 300 301 if (nam->sa_len != sizeof (*sin6)) 302 return (EINVAL); 303 if (sin6->sin6_family != AF_INET6) 304 return (EAFNOSUPPORT); 305 if (sin6->sin6_port == 0) 306 return (EADDRNOTAVAIL); 307 308 /* KAME hack: embed scopeid */ 309 if (in6_embedscope(&sin6->sin6_addr, sin6, inp, &ifp) != 0) 310 return EINVAL; 311 312 if (in6_ifaddr) { 313 /* 314 * If the destination address is UNSPECIFIED addr, 315 * use the loopback addr, e.g ::1. 316 */ 317 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 318 sin6->sin6_addr = kin6addr_loopback; 319 } 320 { 321 /* 322 * XXX: in6_selectsrc might replace the bound local address 323 * with the address specified by setsockopt(IPV6_PKTINFO). 324 * Is it the intended behavior? 325 */ 326 *plocal_addr6 = in6_selectsrc(sin6, inp->in6p_outputopts, 327 inp->in6p_moptions, 328 &inp->in6p_route, 329 &inp->in6p_laddr, &error, td); 330 if (*plocal_addr6 == NULL) { 331 if (error == 0) 332 error = EADDRNOTAVAIL; 333 return (error); 334 } 335 /* 336 * Don't do pcblookup call here; return interface in 337 * plocal_addr6 338 * and exit to caller, that will do the lookup. 339 */ 340 } 341 342 if (inp->in6p_route.ro_rt) 343 ifp = inp->in6p_route.ro_rt->rt_ifp; 344 345 return (0); 346 } 347 348 /* 349 * Outer subroutine: 350 * Connect from a socket to a specified address. 351 * Both address and port must be specified in argument sin. 352 * If don't have a local address for this socket yet, 353 * then pick one. 354 */ 355 int 356 in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct thread *td) 357 { 358 struct in6_addr *addr6; 359 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 360 int error; 361 362 /* Reject v4-mapped address */ 363 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 364 return EADDRNOTAVAIL; 365 366 /* 367 * Call inner routine, to assign local interface address. 368 * in6_pcbladdr() may automatically fill in sin6_scope_id. 369 */ 370 if ((error = in6_pcbladdr(inp, nam, &addr6, td)) != 0) 371 return (error); 372 373 if (in6_pcblookup_hash(inp->inp_pcbinfo, &sin6->sin6_addr, 374 sin6->sin6_port, 375 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 376 ? addr6 : &inp->in6p_laddr, 377 inp->inp_lport, 0, NULL) != NULL) { 378 return (EADDRINUSE); 379 } 380 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 381 if (inp->inp_lport == 0) { 382 error = in6_pcbbind(inp, NULL, td); 383 if (error) 384 return (error); 385 } 386 inp->in6p_laddr = *addr6; 387 } 388 inp->in6p_faddr = sin6->sin6_addr; 389 inp->inp_fport = sin6->sin6_port; 390 /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ 391 inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; 392 if (inp->in6p_flags & IN6P_AUTOFLOWLABEL) 393 inp->in6p_flowinfo |= 394 (htonl(ip6_flow_seq++) & IPV6_FLOWLABEL_MASK); 395 396 in_pcbinsconnhash(inp); 397 return (0); 398 } 399 400 void 401 in6_pcbdisconnect(struct inpcb *inp) 402 { 403 bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); 404 inp->inp_fport = 0; 405 /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ 406 inp->in6p_flowinfo &= ~IPV6_FLOWLABEL_MASK; 407 in_pcbremconnhash(inp); 408 if (inp->inp_socket->so_state & SS_NOFDREF) 409 in6_pcbdetach(inp); 410 } 411 412 void 413 in6_pcbdetach(struct inpcb *inp) 414 { 415 struct socket *so = inp->inp_socket; 416 struct inpcbinfo *ipi = inp->inp_pcbinfo; 417 418 #ifdef IPSEC 419 if (inp->in6p_sp != NULL) 420 ipsec6_delete_pcbpolicy(inp); 421 #endif /* IPSEC */ 422 inp->inp_gencnt = ++ipi->ipi_gencnt; 423 in_pcbremlists(inp); 424 so->so_pcb = NULL; 425 KKASSERT((so->so_state & SS_ASSERTINPROG) == 0); 426 sofree(so); /* remove pcb ref */ 427 428 if (inp->in6p_options) 429 m_freem(inp->in6p_options); 430 ip6_freepcbopts(inp->in6p_outputopts); 431 ip6_freemoptions(inp->in6p_moptions); 432 if (inp->in6p_route.ro_rt) 433 rtfree(inp->in6p_route.ro_rt); 434 /* Check and free IPv4 related resources in case of mapped addr */ 435 if (inp->inp_options) 436 m_free(inp->inp_options); 437 ip_freemoptions(inp->inp_moptions); 438 439 kfree(inp, M_PCB); 440 } 441 442 /* 443 * The calling convention of in6_setsockaddr() and in6_setpeeraddr() was 444 * modified to match the pru_sockaddr() and pru_peeraddr() entry points 445 * in struct pr_usrreqs, so that protocols can just reference then directly 446 * without the need for a wrapper function. The socket must have a valid 447 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 448 * except through a kernel programming error, so it is acceptable to panic 449 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 450 * because there actually /is/ a programming error somewhere... XXX) 451 */ 452 void 453 in6_setsockaddr_dispatch(netmsg_t msg) 454 { 455 int error; 456 457 error = in6_setsockaddr(msg->sockaddr.base.nm_so, msg->sockaddr.nm_nam); 458 lwkt_replymsg(&msg->sockaddr.base.lmsg, error); 459 } 460 461 int 462 in6_setsockaddr(struct socket *so, struct sockaddr **nam) 463 { 464 struct inpcb *inp; 465 struct sockaddr_in6 *sin6; 466 467 /* 468 * Do the malloc first in case it blocks. 469 */ 470 sin6 = kmalloc(sizeof *sin6, M_SONAME, M_WAITOK | M_ZERO); 471 sin6->sin6_family = AF_INET6; 472 sin6->sin6_len = sizeof(*sin6); 473 474 crit_enter(); 475 inp = so->so_pcb; 476 if (!inp) { 477 crit_exit(); 478 kfree(sin6, M_SONAME); 479 return EINVAL; 480 } 481 sin6->sin6_port = inp->inp_lport; 482 sin6->sin6_addr = inp->in6p_laddr; 483 crit_exit(); 484 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 485 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); 486 else 487 sin6->sin6_scope_id = 0; /*XXX*/ 488 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 489 sin6->sin6_addr.s6_addr16[1] = 0; 490 491 *nam = (struct sockaddr *)sin6; 492 return 0; 493 } 494 495 void 496 in6_setpeeraddr_dispatch(netmsg_t msg) 497 { 498 int error; 499 500 error = in6_setpeeraddr(msg->peeraddr.base.nm_so, msg->peeraddr.nm_nam); 501 lwkt_replymsg(&msg->peeraddr.base.lmsg, error); 502 } 503 504 int 505 in6_setpeeraddr(struct socket *so, struct sockaddr **nam) 506 { 507 struct inpcb *inp; 508 struct sockaddr_in6 *sin6; 509 510 /* 511 * Do the malloc first in case it blocks. 512 */ 513 sin6 = kmalloc(sizeof(*sin6), M_SONAME, M_WAITOK | M_ZERO); 514 sin6->sin6_family = AF_INET6; 515 sin6->sin6_len = sizeof(struct sockaddr_in6); 516 517 crit_enter(); 518 inp = so->so_pcb; 519 if (!inp) { 520 crit_exit(); 521 kfree(sin6, M_SONAME); 522 return EINVAL; 523 } 524 sin6->sin6_port = inp->inp_fport; 525 sin6->sin6_addr = inp->in6p_faddr; 526 crit_exit(); 527 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 528 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); 529 else 530 sin6->sin6_scope_id = 0; /*XXX*/ 531 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 532 sin6->sin6_addr.s6_addr16[1] = 0; 533 534 *nam = (struct sockaddr *)sin6; 535 return 0; 536 } 537 538 /* 539 * Pass some notification to all connections of a protocol 540 * associated with address dst. The local address and/or port numbers 541 * may be specified to limit the search. The "usual action" will be 542 * taken, depending on the ctlinput cmd. The caller must filter any 543 * cmds that are uninteresting (e.g., no error in the map). 544 * Call the protocol specific routine (if any) to report 545 * any errors for each matching socket. 546 */ 547 void 548 in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, in_port_t fport, 549 const struct sockaddr *src, in_port_t lport, int cmd, int arg, 550 inp_notify_t notify) 551 { 552 struct inpcb *inp, *marker; 553 struct sockaddr_in6 sa6_src, *sa6_dst; 554 u_int32_t flowinfo; 555 556 if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6) 557 return; 558 559 sa6_dst = (struct sockaddr_in6 *)dst; 560 if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr)) 561 return; 562 563 /* 564 * note that src can be NULL when we get notify by local fragmentation. 565 */ 566 sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src; 567 flowinfo = sa6_src.sin6_flowinfo; 568 569 /* 570 * Redirects go to all references to the destination, 571 * and use in6_rtchange to invalidate the route cache. 572 * Dead host indications: also use in6_rtchange to invalidate 573 * the cache, and deliver the error to all the sockets. 574 * Otherwise, if we have knowledge of the local port and address, 575 * deliver only to that socket. 576 */ 577 if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { 578 fport = 0; 579 lport = 0; 580 bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr)); 581 582 if (cmd != PRC_HOSTDEAD) 583 notify = in6_rtchange; 584 } 585 if (cmd != PRC_MSGSIZE) 586 arg = inet6ctlerrmap[cmd]; 587 588 marker = in_pcbmarker(mycpuid); 589 590 GET_PCBINFO_TOKEN(pcbinfo); 591 592 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list); 593 while ((inp = LIST_NEXT(marker, inp_list)) != NULL) { 594 LIST_REMOVE(marker, inp_list); 595 LIST_INSERT_AFTER(inp, marker, inp_list); 596 597 if (inp->inp_flags & INP_PLACEMARKER) 598 continue; 599 600 if (!INP_ISIPV6(inp)) 601 continue; 602 /* 603 * If the error designates a new path MTU for a destination 604 * and the application (associated with this socket) wanted to 605 * know the value, notify. Note that we notify for all 606 * disconnected sockets if the corresponding application 607 * wanted. This is because some UDP applications keep sending 608 * sockets disconnected. 609 * XXX: should we avoid to notify the value to TCP sockets? 610 */ 611 if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 && 612 (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || 613 IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr))) { 614 ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst, &arg); 615 } 616 617 /* 618 * Detect if we should notify the error. If no source and 619 * destination ports are specifed, but non-zero flowinfo and 620 * local address match, notify the error. This is the case 621 * when the error is delivered with an encrypted buffer 622 * by ESP. Otherwise, just compare addresses and ports 623 * as usual. 624 */ 625 if (lport == 0 && fport == 0 && flowinfo && 626 inp->inp_socket != NULL && 627 flowinfo == (inp->in6p_flowinfo & IPV6_FLOWLABEL_MASK) && 628 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) 629 goto do_notify; 630 else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, 631 &sa6_dst->sin6_addr) || 632 inp->inp_socket == 0 || 633 (lport && inp->inp_lport != lport) || 634 (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && 635 !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, 636 &sa6_src.sin6_addr)) || 637 (fport && inp->inp_fport != fport)) 638 continue; 639 640 do_notify: 641 if (notify) 642 (*notify)(inp, arg); 643 } 644 LIST_REMOVE(marker, inp_list); 645 646 REL_PCBINFO_TOKEN(pcbinfo); 647 } 648 649 /* 650 * Lookup a PCB based on the local address and port. 651 */ 652 struct inpcb * 653 in6_pcblookup_local(struct inpcbportinfo *portinfo, 654 const struct in6_addr *laddr, u_int lport_arg, int wild_okay, 655 struct ucred *cred) 656 { 657 struct inpcb *inp; 658 int matchwild = 3, wildcard; 659 u_short lport = lport_arg; 660 struct inpcbporthead *porthash; 661 struct inpcbport *phd; 662 struct inpcb *match = NULL; 663 664 /* 665 * If the porthashbase is shared across several cpus, it must 666 * have been locked. 667 */ 668 ASSERT_PORT_TOKEN_HELD(portinfo); 669 670 /* 671 * Best fit PCB lookup. 672 * 673 * First see if this local port is in use by looking on the 674 * port hash list. 675 */ 676 porthash = &portinfo->porthashbase[ 677 INP_PCBPORTHASH(lport, portinfo->porthashmask)]; 678 LIST_FOREACH(phd, porthash, phd_hash) { 679 if (phd->phd_port == lport) 680 break; 681 } 682 683 if (phd != NULL) { 684 /* 685 * Port is in use by one or more PCBs. Look for best 686 * fit. 687 */ 688 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 689 wildcard = 0; 690 if (!INP_ISIPV6(inp)) 691 continue; 692 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) 693 wildcard++; 694 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 695 if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 696 wildcard++; 697 else if (!IN6_ARE_ADDR_EQUAL( 698 &inp->in6p_laddr, laddr)) 699 continue; 700 } else { 701 if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) 702 wildcard++; 703 } 704 if (wildcard && !wild_okay) 705 continue; 706 if (wildcard < matchwild && 707 (cred == NULL || 708 cred->cr_prison == 709 inp->inp_socket->so_cred->cr_prison)) { 710 match = inp; 711 matchwild = wildcard; 712 if (wildcard == 0) 713 break; 714 else 715 matchwild = wildcard; 716 } 717 } 718 } 719 return (match); 720 } 721 722 void 723 in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) 724 { 725 struct in6pcb *in6p, *marker; 726 struct ip6_moptions *im6o; 727 struct in6_multi_mship *imm, *nimm; 728 729 /* 730 * We only need to make sure that we are in netisr0, where all 731 * multicast operation happen. We could check inpcbinfo which 732 * does not belong to netisr0 by holding the inpcbinfo's token. 733 * In this case, the pcbinfo must be able to be shared, i.e. 734 * pcbinfo->infotoken is not NULL. 735 */ 736 ASSERT_IN_NETISR(0); 737 KASSERT(pcbinfo->cpu == 0 || pcbinfo->infotoken != NULL, 738 ("pcbinfo could not be shared")); 739 740 /* 741 * Get a marker for the current netisr (netisr0). 742 * 743 * It is possible that the multicast address deletion blocks, 744 * which could cause temporary token releasing. So we use 745 * inpcb marker here to get a coherent view of the inpcb list. 746 * 747 * While, on the other hand, moptions are only added and deleted 748 * in netisr0, so we would not see staled moption or miss moption 749 * even if the token was released due to the blocking multicast 750 * address deletion. 751 */ 752 marker = in_pcbmarker(mycpuid); 753 754 GET_PCBINFO_TOKEN(pcbinfo); 755 756 LIST_INSERT_HEAD(&pcbinfo->pcblisthead, marker, inp_list); 757 while ((in6p = LIST_NEXT(marker, inp_list)) != NULL) { 758 LIST_REMOVE(marker, inp_list); 759 LIST_INSERT_AFTER(in6p, marker, inp_list); 760 761 if (in6p->in6p_flags & INP_PLACEMARKER) 762 continue; 763 im6o = in6p->in6p_moptions; 764 if (INP_ISIPV6(in6p) && im6o) { 765 /* 766 * Unselect the outgoing interface if it is being 767 * detached. 768 */ 769 if (im6o->im6o_multicast_ifp == ifp) 770 im6o->im6o_multicast_ifp = NULL; 771 772 /* 773 * Drop multicast group membership if we joined 774 * through the interface being detached. 775 * XXX controversial - is it really legal for kernel 776 * to force this? 777 */ 778 for (imm = im6o->im6o_memberships.lh_first; 779 imm != NULL; imm = nimm) { 780 nimm = imm->i6mm_chain.le_next; 781 if (imm->i6mm_maddr->in6m_ifp == ifp) { 782 LIST_REMOVE(imm, i6mm_chain); 783 in6_delmulti(imm->i6mm_maddr); 784 kfree(imm, M_IPMADDR); 785 } 786 } 787 } 788 } 789 LIST_REMOVE(marker, inp_list); 790 791 REL_PCBINFO_TOKEN(pcbinfo); 792 } 793 794 /* 795 * Check for alternatives when higher level complains 796 * about service problems. For now, invalidate cached 797 * routing information. If the route was created dynamically 798 * (by a redirect), time to try a default gateway again. 799 */ 800 void 801 in6_losing(struct inpcb *in6p) 802 { 803 struct rtentry *rt; 804 struct rt_addrinfo info; 805 806 if ((rt = in6p->in6p_route.ro_rt) != NULL) { 807 bzero((caddr_t)&info, sizeof(info)); 808 info.rti_flags = rt->rt_flags; 809 info.rti_info[RTAX_DST] = rt_key(rt); 810 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 811 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 812 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); 813 if (rt->rt_flags & RTF_DYNAMIC) { 814 rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, 815 rt_mask(rt), rt->rt_flags, NULL); 816 } 817 in6p->in6p_route.ro_rt = NULL; 818 rtfree(rt); 819 /* 820 * A new route can be allocated 821 * the next time output is attempted. 822 */ 823 } 824 } 825 826 /* 827 * After a routing change, flush old routing 828 * and allocate a (hopefully) better one. 829 */ 830 void 831 in6_rtchange(struct inpcb *inp, int error) 832 { 833 if (inp->in6p_route.ro_rt) { 834 rtfree(inp->in6p_route.ro_rt); 835 inp->in6p_route.ro_rt = 0; 836 /* 837 * A new route can be allocated the next time 838 * output is attempted. 839 */ 840 } 841 } 842 843 /* 844 * Lookup PCB in hash list. 845 */ 846 struct inpcb * 847 in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, 848 u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, 849 int wildcard, struct ifnet *ifp) 850 { 851 struct inpcbhead *head; 852 struct inpcb *inp; 853 struct inpcb *jinp = NULL; 854 u_short fport = fport_arg, lport = lport_arg; 855 int faith; 856 857 if (faithprefix_p != NULL) 858 faith = (*faithprefix_p)(laddr); 859 else 860 faith = 0; 861 862 /* 863 * First look for an exact match. 864 */ 865 head = &pcbinfo->hashbase[INP_PCBCONNHASH(faddr->s6_addr32[3] /* XXX */, 866 fport, 867 laddr->s6_addr32[3], /* XXX JH */ 868 lport, 869 pcbinfo->hashmask)]; 870 LIST_FOREACH(inp, head, inp_hash) { 871 if (!INP_ISIPV6(inp)) 872 continue; 873 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && 874 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && 875 inp->inp_fport == fport && 876 inp->inp_lport == lport) { 877 /* 878 * Found. 879 */ 880 if (inp->inp_socket == NULL || 881 inp->inp_socket->so_cred->cr_prison == NULL) { 882 return (inp); 883 } else { 884 if (jinp == NULL) 885 jinp = inp; 886 } 887 } 888 } 889 if (jinp != NULL) 890 return(jinp); 891 892 if (wildcard) { 893 struct inpcontainerhead *chead; 894 struct inpcontainer *ic; 895 struct inpcb *local_wild = NULL; 896 struct inpcb *jinp_wild = NULL; 897 struct sockaddr_in6 jsin6; 898 struct ucred *cred; 899 900 /* 901 * Order of socket selection: 902 * 1. non-jailed, non-wild. 903 * 2. non-jailed, wild. 904 * 3. jailed, non-wild. 905 * 4. jailed, wild. 906 */ 907 jsin6.sin6_family = AF_INET6; 908 chead = &pcbinfo->wildcardhashbase[INP_PCBWILDCARDHASH(lport, 909 pcbinfo->wildcardhashmask)]; 910 911 GET_PCBINFO_TOKEN(pcbinfo); 912 LIST_FOREACH(ic, chead, ic_list) { 913 inp = ic->ic_inp; 914 if (inp->inp_flags & INP_PLACEMARKER) 915 continue; 916 917 if (!INP_ISIPV6(inp)) 918 continue; 919 if (inp->inp_socket != NULL) 920 cred = inp->inp_socket->so_cred; 921 else 922 cred = NULL; 923 924 if (cred != NULL && jailed(cred)) { 925 if (jinp != NULL) { 926 continue; 927 } else { 928 jsin6.sin6_addr = *laddr; 929 if (!jailed_ip(cred->cr_prison, 930 (struct sockaddr *)&jsin6)) 931 continue; 932 } 933 } 934 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && 935 inp->inp_lport == lport) { 936 if (faith && (inp->inp_flags & INP_FAITH) == 0) 937 continue; 938 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, 939 laddr)) { 940 if (cred != NULL && jailed(cred)) { 941 jinp = inp; 942 } else { 943 REL_PCBINFO_TOKEN(pcbinfo); 944 return (inp); 945 } 946 } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 947 if (cred != NULL && jailed(cred)) 948 jinp_wild = inp; 949 else 950 local_wild = inp; 951 } 952 } 953 } 954 REL_PCBINFO_TOKEN(pcbinfo); 955 956 if (local_wild != NULL) 957 return (local_wild); 958 if (jinp != NULL) 959 return (jinp); 960 return (jinp_wild); 961 } 962 963 /* 964 * Not found. 965 */ 966 return (NULL); 967 } 968 969 void 970 init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) 971 { 972 struct ip6_hdr *ip; 973 974 ip = mtod(m, struct ip6_hdr *); 975 bzero(sin6, sizeof(*sin6)); 976 sin6->sin6_len = sizeof(*sin6); 977 sin6->sin6_family = AF_INET6; 978 sin6->sin6_addr = ip->ip6_src; 979 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 980 sin6->sin6_addr.s6_addr16[1] = 0; 981 sin6->sin6_scope_id = 982 (m->m_pkthdr.rcvif && IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 983 ? m->m_pkthdr.rcvif->if_index : 0; 984 985 return; 986 } 987 988 void 989 in6_savefaddr(struct socket *so, const struct sockaddr *faddr) 990 { 991 struct sockaddr_in6 *sin6; 992 993 KASSERT(faddr->sa_family == AF_INET6, 994 ("not AF_INET6 faddr %d", faddr->sa_family)); 995 996 sin6 = kmalloc(sizeof(*sin6), M_SONAME, M_WAITOK | M_ZERO); 997 sin6->sin6_family = AF_INET6; 998 sin6->sin6_len = sizeof(*sin6); 999 1000 sin6->sin6_port = ((const struct sockaddr_in6 *)faddr)->sin6_port; 1001 sin6->sin6_addr = ((const struct sockaddr_in6 *)faddr)->sin6_addr; 1002 1003 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 1004 sin6->sin6_scope_id = ntohs(sin6->sin6_addr.s6_addr16[1]); 1005 else 1006 sin6->sin6_scope_id = 0; /*XXX*/ 1007 if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) 1008 sin6->sin6_addr.s6_addr16[1] = 0; 1009 1010 so->so_faddr = (struct sockaddr *)sin6; 1011 } 1012