1 /* $OpenBSD: in6_pcb.c,v 1.138 2024/02/13 12:22:09 bluhm Exp $ */ 2 3 /* 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 34 * 35 * NRL grants permission for redistribution and use in source and binary 36 * forms, with or without modification, of the software and documentation 37 * created at NRL provided that the following conditions are met: 38 * 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgements: 46 * This product includes software developed by the University of 47 * California, Berkeley and its contributors. 48 * This product includes software developed at the Information 49 * Technology Division, US Naval Research Laboratory. 50 * 4. Neither the name of the NRL nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 55 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 56 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 57 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 58 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 59 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 60 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 61 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 62 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 63 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 64 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65 * 66 * The views and conclusions contained in the software and documentation 67 * are those of the authors and should not be interpreted as representing 68 * official policies, either expressed or implied, of the US Naval 69 * Research Laboratory (NRL). 70 */ 71 72 /* 73 * Copyright (c) 1982, 1986, 1990, 1993, 1995 74 * Regents of the University of California. All rights reserved. 75 * 76 * Redistribution and use in source and binary forms, with or without 77 * modification, are permitted provided that the following conditions 78 * are met: 79 * 1. Redistributions of source code must retain the above copyright 80 * notice, this list of conditions and the following disclaimer. 81 * 2. Redistributions in binary form must reproduce the above copyright 82 * notice, this list of conditions and the following disclaimer in the 83 * documentation and/or other materials provided with the distribution. 84 * 3. Neither the name of the University nor the names of its contributors 85 * may be used to endorse or promote products derived from this software 86 * without specific prior written permission. 87 * 88 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 89 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 90 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 91 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 92 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 93 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 94 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 95 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 96 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 97 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 98 * SUCH DAMAGE. 99 * 100 */ 101 102 #include "pf.h" 103 #include "stoeplitz.h" 104 105 #include <sys/param.h> 106 #include <sys/systm.h> 107 #include <sys/mbuf.h> 108 #include <sys/protosw.h> 109 #include <sys/socket.h> 110 #include <sys/socketvar.h> 111 112 #include <net/if.h> 113 #include <net/if_var.h> 114 #include <net/pfvar.h> 115 116 #include <netinet/in.h> 117 #include <netinet6/in6_var.h> 118 #include <netinet/ip.h> 119 #include <netinet/ip_var.h> 120 #include <netinet6/ip6_var.h> 121 #include <netinet/in_pcb.h> 122 123 #if NSTOEPLITZ > 0 124 #include <net/toeplitz.h> 125 #endif 126 127 const struct in6_addr zeroin6_addr; 128 129 struct inpcb *in6_pcbhash_lookup(struct inpcbtable *, uint64_t, u_int, 130 const struct in6_addr *, u_short, const struct in6_addr *, u_short); 131 132 struct inpcb * in6_pcblookup_lock(struct inpcbtable *, const struct in6_addr *, 133 u_int, const struct in6_addr *, u_int, u_int, int); 134 135 uint64_t 136 in6_pcbhash(struct inpcbtable *table, u_int rdomain, 137 const struct in6_addr *faddr, u_short fport, 138 const struct in6_addr *laddr, u_short lport) 139 { 140 SIPHASH_CTX ctx; 141 u_int32_t nrdom = htonl(rdomain); 142 143 SipHash24_Init(&ctx, &table->inpt_key); 144 SipHash24_Update(&ctx, &nrdom, sizeof(nrdom)); 145 SipHash24_Update(&ctx, faddr, sizeof(*faddr)); 146 SipHash24_Update(&ctx, &fport, sizeof(fport)); 147 SipHash24_Update(&ctx, laddr, sizeof(*laddr)); 148 SipHash24_Update(&ctx, &lport, sizeof(lport)); 149 return SipHash24_End(&ctx); 150 } 151 152 int 153 in6_pcbaddrisavail_lock(const struct inpcb *inp, struct sockaddr_in6 *sin6, 154 int wild, struct proc *p, int lock) 155 { 156 struct socket *so = inp->inp_socket; 157 struct inpcbtable *table = inp->inp_table; 158 u_short lport = sin6->sin6_port; 159 int reuseport = (so->so_options & SO_REUSEPORT); 160 161 wild |= INPLOOKUP_IPV6; 162 /* KAME hack: embed scopeid */ 163 if (in6_embedscope(&sin6->sin6_addr, sin6, 164 inp->inp_outputopts6, inp->inp_moptions6) != 0) 165 return (EINVAL); 166 /* this must be cleared for ifa_ifwithaddr() */ 167 sin6->sin6_scope_id = 0; 168 /* reject IPv4 mapped address, we have no support for it */ 169 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 170 return (EADDRNOTAVAIL); 171 172 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 173 /* 174 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 175 * allow complete duplication of binding if 176 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 177 * and a multicast address is bound on both 178 * new and duplicated sockets. 179 */ 180 if (so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) 181 reuseport = SO_REUSEADDR | SO_REUSEPORT; 182 } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 183 struct ifaddr *ifa = NULL; 184 185 sin6->sin6_port = 0; /* 186 * Yechhhh, because of upcoming 187 * call to ifa_ifwithaddr(), which 188 * does bcmp's over the PORTS as 189 * well. (What about flow?) 190 */ 191 sin6->sin6_flowinfo = 0; 192 if (!(so->so_options & SO_BINDANY) && 193 (ifa = ifa_ifwithaddr(sin6tosa(sin6), 194 inp->inp_rtableid)) == NULL) 195 return (EADDRNOTAVAIL); 196 sin6->sin6_port = lport; 197 198 /* 199 * bind to an anycast address might accidentally 200 * cause sending a packet with an anycast source 201 * address, so we forbid it. 202 * 203 * We should allow to bind to a deprecated address, 204 * since the application dare to use it. 205 * But, can we assume that they are careful enough 206 * to check if the address is deprecated or not? 207 * Maybe, as a safeguard, we should have a setsockopt 208 * flag to control the bind(2) behavior against 209 * deprecated addresses (default: forbid bind(2)). 210 */ 211 if (ifa && ifatoia6(ifa)->ia6_flags & (IN6_IFF_ANYCAST| 212 IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED|IN6_IFF_DETACHED)) 213 return (EADDRNOTAVAIL); 214 } 215 if (lport) { 216 struct inpcb *t; 217 int error = 0; 218 219 if (so->so_euid && !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 220 t = in_pcblookup_local_lock(table, &sin6->sin6_addr, 221 lport, INPLOOKUP_WILDCARD | INPLOOKUP_IPV6, 222 inp->inp_rtableid, lock); 223 if (t && (so->so_euid != t->inp_socket->so_euid)) 224 error = EADDRINUSE; 225 if (lock == IN_PCBLOCK_GRAB) 226 in_pcbunref(t); 227 if (error) 228 return (error); 229 } 230 t = in_pcblookup_local_lock(table, &sin6->sin6_addr, lport, 231 wild, inp->inp_rtableid, lock); 232 if (t && (reuseport & t->inp_socket->so_options) == 0) 233 error = EADDRINUSE; 234 if (lock == IN_PCBLOCK_GRAB) 235 in_pcbunref(t); 236 if (error) 237 return (error); 238 } 239 return (0); 240 } 241 242 int 243 in6_pcbaddrisavail(const struct inpcb *inp, struct sockaddr_in6 *sin6, 244 int wild, struct proc *p) 245 { 246 return in6_pcbaddrisavail_lock(inp, sin6, wild, p, IN_PCBLOCK_GRAB); 247 } 248 249 /* 250 * Connect from a socket to a specified address. 251 * Both address and port must be specified in argument sin6. 252 * Eventually, flow labels will have to be dealt with here, as well. 253 * 254 * If don't have a local address for this socket yet, 255 * then pick one. 256 */ 257 int 258 in6_pcbconnect(struct inpcb *inp, struct mbuf *nam) 259 { 260 struct inpcbtable *table = inp->inp_table; 261 const struct in6_addr *in6a; 262 struct sockaddr_in6 *sin6; 263 struct inpcb *t; 264 int error; 265 struct sockaddr_in6 tmp; 266 267 KASSERT(ISSET(inp->inp_flags, INP_IPV6)); 268 269 if ((error = in6_nam2sin6(nam, &sin6))) 270 return (error); 271 if (sin6->sin6_port == 0) 272 return (EADDRNOTAVAIL); 273 /* reject IPv4 mapped address, we have no support for it */ 274 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) 275 return (EADDRNOTAVAIL); 276 277 /* protect *sin6 from overwrites */ 278 tmp = *sin6; 279 sin6 = &tmp; 280 281 /* KAME hack: embed scopeid */ 282 if (in6_embedscope(&sin6->sin6_addr, sin6, 283 inp->inp_outputopts6, inp->inp_moptions6) != 0) 284 return (EINVAL); 285 /* this must be cleared for ifa_ifwithaddr() */ 286 sin6->sin6_scope_id = 0; 287 288 /* Source address selection. */ 289 /* 290 * XXX: in6_selectsrc might replace the bound local address 291 * with the address specified by setsockopt(IPV6_PKTINFO). 292 * Is it the intended behavior? 293 */ 294 error = in6_pcbselsrc(&in6a, sin6, inp, inp->inp_outputopts6); 295 if (error) 296 return (error); 297 298 inp->inp_ipv6.ip6_hlim = (u_int8_t)in6_selecthlim(inp); 299 300 /* keep lookup, modification, and rehash in sync */ 301 mtx_enter(&table->inpt_mtx); 302 303 t = in6_pcblookup_lock(inp->inp_table, &sin6->sin6_addr, 304 sin6->sin6_port, 305 IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6) ? in6a : &inp->inp_laddr6, 306 inp->inp_lport, inp->inp_rtableid, IN_PCBLOCK_HOLD); 307 if (t != NULL) { 308 mtx_leave(&table->inpt_mtx); 309 return (EADDRINUSE); 310 } 311 312 KASSERT(IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6) || inp->inp_lport); 313 314 if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) { 315 if (inp->inp_lport == 0) { 316 error = in_pcbbind_locked(inp, NULL, curproc); 317 if (error) { 318 mtx_leave(&table->inpt_mtx); 319 return (error); 320 } 321 t = in6_pcblookup_lock(inp->inp_table, &sin6->sin6_addr, 322 sin6->sin6_port, in6a, inp->inp_lport, 323 inp->inp_rtableid, IN_PCBLOCK_HOLD); 324 if (t != NULL) { 325 inp->inp_lport = 0; 326 mtx_leave(&table->inpt_mtx); 327 return (EADDRINUSE); 328 } 329 } 330 inp->inp_laddr6 = *in6a; 331 } 332 inp->inp_faddr6 = sin6->sin6_addr; 333 inp->inp_fport = sin6->sin6_port; 334 in_pcbrehash(inp); 335 336 mtx_leave(&table->inpt_mtx); 337 338 inp->inp_flowinfo &= ~IPV6_FLOWLABEL_MASK; 339 if (ip6_auto_flowlabel) 340 inp->inp_flowinfo |= 341 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); 342 #if NSTOEPLITZ > 0 343 inp->inp_flowid = stoeplitz_ip6port(&inp->inp_faddr6, 344 &inp->inp_laddr6, inp->inp_fport, inp->inp_lport); 345 #endif 346 return (0); 347 } 348 349 /* 350 * Get the local address/port, and put it in a sockaddr_in6. 351 * This services the getsockname(2) call. 352 */ 353 void 354 in6_setsockaddr(struct inpcb *inp, struct mbuf *nam) 355 { 356 struct sockaddr_in6 *sin6; 357 358 nam->m_len = sizeof(struct sockaddr_in6); 359 sin6 = mtod(nam,struct sockaddr_in6 *); 360 361 bzero ((caddr_t)sin6,sizeof(struct sockaddr_in6)); 362 sin6->sin6_family = AF_INET6; 363 sin6->sin6_len = sizeof(struct sockaddr_in6); 364 sin6->sin6_port = inp->inp_lport; 365 sin6->sin6_addr = inp->inp_laddr6; 366 /* KAME hack: recover scopeid */ 367 in6_recoverscope(sin6, &inp->inp_laddr6); 368 } 369 370 /* 371 * Get the foreign address/port, and put it in a sockaddr_in6. 372 * This services the getpeername(2) call. 373 */ 374 void 375 in6_setpeeraddr(struct inpcb *inp, struct mbuf *nam) 376 { 377 struct sockaddr_in6 *sin6; 378 379 nam->m_len = sizeof(struct sockaddr_in6); 380 sin6 = mtod(nam,struct sockaddr_in6 *); 381 382 bzero ((caddr_t)sin6,sizeof(struct sockaddr_in6)); 383 sin6->sin6_family = AF_INET6; 384 sin6->sin6_len = sizeof(struct sockaddr_in6); 385 sin6->sin6_port = inp->inp_fport; 386 sin6->sin6_addr = inp->inp_faddr6; 387 /* KAME hack: recover scopeid */ 388 in6_recoverscope(sin6, &inp->inp_faddr6); 389 } 390 391 int 392 in6_sockaddr(struct socket *so, struct mbuf *nam) 393 { 394 struct inpcb *inp; 395 396 inp = sotoinpcb(so); 397 in6_setsockaddr(inp, nam); 398 399 return (0); 400 } 401 402 int 403 in6_peeraddr(struct socket *so, struct mbuf *nam) 404 { 405 struct inpcb *inp; 406 407 inp = sotoinpcb(so); 408 in6_setpeeraddr(inp, nam); 409 410 return (0); 411 } 412 413 /* 414 * Pass some notification to all connections of a protocol 415 * associated with address dst. The local address and/or port numbers 416 * may be specified to limit the search. The "usual action" will be 417 * taken, depending on the ctlinput cmd. The caller must filter any 418 * cmds that are uninteresting (e.g., no error in the map). 419 * Call the protocol specific routine (if any) to report 420 * any errors for each matching socket. 421 * 422 * Also perform input-side security policy check 423 * once PCB to be notified has been located. 424 */ 425 void 426 in6_pcbnotify(struct inpcbtable *table, const struct sockaddr_in6 *dst, 427 uint fport_arg, const struct sockaddr_in6 *src, uint lport_arg, 428 u_int rtable, int cmd, void *cmdarg, void (*notify)(struct inpcb *, int)) 429 { 430 SIMPLEQ_HEAD(, inpcb) inpcblist; 431 struct inpcb *inp; 432 u_short fport = fport_arg, lport = lport_arg; 433 struct sockaddr_in6 sa6_src; 434 int errno; 435 u_int32_t flowinfo; 436 u_int rdomain; 437 438 if ((unsigned)cmd >= PRC_NCMDS) 439 return; 440 441 if (IN6_IS_ADDR_UNSPECIFIED(&dst->sin6_addr)) 442 return; 443 if (IN6_IS_ADDR_V4MAPPED(&dst->sin6_addr)) { 444 #ifdef DIAGNOSTIC 445 printf("%s: Huh? Thought we never got " 446 "called with mapped!\n", __func__); 447 #endif 448 return; 449 } 450 451 /* 452 * note that src can be NULL when we get notify by local fragmentation. 453 */ 454 sa6_src = (src == NULL) ? sa6_any : *src; 455 flowinfo = sa6_src.sin6_flowinfo; 456 457 /* 458 * Redirects go to all references to the destination, 459 * and use in_rtchange to invalidate the route cache. 460 * Dead host indications: also use in_rtchange to invalidate 461 * the cache, and deliver the error to all the sockets. 462 * Otherwise, if we have knowledge of the local port and address, 463 * deliver only to that socket. 464 */ 465 if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { 466 fport = 0; 467 lport = 0; 468 sa6_src.sin6_addr = in6addr_any; 469 470 if (cmd != PRC_HOSTDEAD) 471 notify = in_rtchange; 472 } 473 errno = inet6ctlerrmap[cmd]; 474 if (notify == NULL) 475 return; 476 477 SIMPLEQ_INIT(&inpcblist); 478 rdomain = rtable_l2(rtable); 479 rw_enter_write(&table->inpt_notify); 480 mtx_enter(&table->inpt_mtx); 481 TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { 482 if (!ISSET(inp->inp_flags, INP_IPV6)) 483 continue; 484 485 /* 486 * Under the following condition, notify of redirects 487 * to the pcb, without making address matches against inpcb. 488 * - redirect notification is arrived. 489 * - the inpcb is unconnected. 490 * - the inpcb is caching !RTF_HOST routing entry. 491 * - the ICMPv6 notification is from the gateway cached in the 492 * inpcb. i.e. ICMPv6 notification is from nexthop gateway 493 * the inpcb used very recently. 494 * 495 * This is to improve interaction between netbsd/openbsd 496 * redirect handling code, and inpcb route cache code. 497 * without the clause, !RTF_HOST routing entry (which carries 498 * gateway used by inpcb right before the ICMPv6 redirect) 499 * will be cached forever in unconnected inpcb. 500 * 501 * There still is a question regarding to what is TRT: 502 * - On bsdi/freebsd, RTF_HOST (cloned) routing entry will be 503 * generated on packet output. inpcb will always cache 504 * RTF_HOST routing entry so there's no need for the clause 505 * (ICMPv6 redirect will update RTF_HOST routing entry, 506 * and inpcb is caching it already). 507 * However, bsdi/freebsd are vulnerable to local DoS attacks 508 * due to the cloned routing entries. 509 * - Specwise, "destination cache" is mentioned in RFC2461. 510 * Jinmei says that it implies bsdi/freebsd behavior, itojun 511 * is not really convinced. 512 * - Having hiwat/lowat on # of cloned host route (redirect/ 513 * pmtud) may be a good idea. netbsd/openbsd has it. see 514 * icmp6_mtudisc_update(). 515 */ 516 if ((PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) && 517 IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6) && 518 inp->inp_route.ro_rt && 519 !(inp->inp_route.ro_rt->rt_flags & RTF_HOST) && 520 IN6_ARE_ADDR_EQUAL(&inp->inp_route.ro_dstsin6.sin6_addr, 521 &dst->sin6_addr)) { 522 goto do_notify; 523 } 524 525 /* 526 * Detect if we should notify the error. If no source and 527 * destination ports are specified, but non-zero flowinfo and 528 * local address match, notify the error. This is the case 529 * when the error is delivered with an encrypted buffer 530 * by ESP. Otherwise, just compare addresses and ports 531 * as usual. 532 */ 533 if (lport == 0 && fport == 0 && flowinfo && 534 flowinfo == (inp->inp_flowinfo & IPV6_FLOWLABEL_MASK) && 535 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, &sa6_src.sin6_addr)) 536 goto do_notify; 537 else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, 538 &dst->sin6_addr) || 539 rtable_l2(inp->inp_rtableid) != rdomain || 540 (lport && inp->inp_lport != lport) || 541 (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && 542 !IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, 543 &sa6_src.sin6_addr)) || 544 (fport && inp->inp_fport != fport)) { 545 continue; 546 } 547 do_notify: 548 in_pcbref(inp); 549 SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify); 550 } 551 mtx_leave(&table->inpt_mtx); 552 553 while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) { 554 SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify); 555 (*notify)(inp, errno); 556 in_pcbunref(inp); 557 } 558 rw_exit_write(&table->inpt_notify); 559 } 560 561 struct rtentry * 562 in6_pcbrtentry(struct inpcb *inp) 563 { 564 struct route *ro = &inp->inp_route; 565 566 if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) 567 return (NULL); 568 if (route6_cache(ro, &inp->inp_faddr6, inp->inp_rtableid)) { 569 ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, 570 &inp->inp_laddr6.s6_addr32[0], ro->ro_tableid); 571 } 572 return (ro->ro_rt); 573 } 574 575 struct inpcb * 576 in6_pcbhash_lookup(struct inpcbtable *table, uint64_t hash, u_int rdomain, 577 const struct in6_addr *faddr, u_short fport, 578 const struct in6_addr *laddr, u_short lport) 579 { 580 struct inpcbhead *head; 581 struct inpcb *inp; 582 583 NET_ASSERT_LOCKED(); 584 MUTEX_ASSERT_LOCKED(&table->inpt_mtx); 585 586 head = &table->inpt_hashtbl[hash & table->inpt_mask]; 587 LIST_FOREACH(inp, head, inp_hash) { 588 if (!ISSET(inp->inp_flags, INP_IPV6)) 589 continue; 590 if (inp->inp_fport == fport && inp->inp_lport == lport && 591 IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) && 592 IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr) && 593 rtable_l2(inp->inp_rtableid) == rdomain) { 594 break; 595 } 596 } 597 if (inp != NULL) { 598 /* 599 * Move this PCB to the head of hash chain so that 600 * repeated accesses are quicker. This is analogous to 601 * the historic single-entry PCB cache. 602 */ 603 if (inp != LIST_FIRST(head)) { 604 LIST_REMOVE(inp, inp_hash); 605 LIST_INSERT_HEAD(head, inp, inp_hash); 606 } 607 } 608 return (inp); 609 } 610 611 struct inpcb * 612 in6_pcblookup_lock(struct inpcbtable *table, const struct in6_addr *faddr, 613 u_int fport, const struct in6_addr *laddr, u_int lport, u_int rtable, 614 int lock) 615 { 616 struct inpcb *inp; 617 uint64_t hash; 618 u_int rdomain; 619 620 rdomain = rtable_l2(rtable); 621 hash = in6_pcbhash(table, rdomain, faddr, fport, laddr, lport); 622 623 if (lock == IN_PCBLOCK_GRAB) { 624 mtx_enter(&table->inpt_mtx); 625 } else { 626 KASSERT(lock == IN_PCBLOCK_HOLD); 627 MUTEX_ASSERT_LOCKED(&table->inpt_mtx); 628 } 629 inp = in6_pcbhash_lookup(table, hash, rdomain, 630 faddr, fport, laddr, lport); 631 if (lock == IN_PCBLOCK_GRAB) { 632 in_pcbref(inp); 633 mtx_leave(&table->inpt_mtx); 634 } 635 636 #ifdef DIAGNOSTIC 637 if (inp == NULL && in_pcbnotifymiss) { 638 printf("%s: faddr= fport=%d laddr= lport=%d rdom=%u\n", 639 __func__, ntohs(fport), ntohs(lport), rdomain); 640 } 641 #endif 642 return (inp); 643 } 644 645 struct inpcb * 646 in6_pcblookup(struct inpcbtable *table, const struct in6_addr *faddr, 647 u_int fport, const struct in6_addr *laddr, u_int lport, u_int rtable) 648 { 649 return in6_pcblookup_lock(table, faddr, fport, laddr, lport, rtable, 650 IN_PCBLOCK_GRAB); 651 } 652 653 struct inpcb * 654 in6_pcblookup_listen(struct inpcbtable *table, struct in6_addr *laddr, 655 u_int lport, struct mbuf *m, u_int rtable) 656 { 657 const struct in6_addr *key1, *key2; 658 struct inpcb *inp; 659 uint64_t hash; 660 u_int rdomain; 661 662 key1 = laddr; 663 key2 = &zeroin6_addr; 664 #if NPF > 0 665 if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 666 struct pf_divert *divert; 667 668 divert = pf_find_divert(m); 669 KASSERT(divert != NULL); 670 switch (divert->type) { 671 case PF_DIVERT_TO: 672 key1 = key2 = &divert->addr.v6; 673 lport = divert->port; 674 break; 675 case PF_DIVERT_REPLY: 676 return (NULL); 677 default: 678 panic("%s: unknown divert type %d, mbuf %p, divert %p", 679 __func__, divert->type, m, divert); 680 } 681 } else if (m && m->m_pkthdr.pf.flags & PF_TAG_TRANSLATE_LOCALHOST) { 682 /* 683 * Redirected connections should not be treated the same 684 * as connections directed to ::1 since localhost 685 * can only be accessed from the host itself. 686 */ 687 key1 = &zeroin6_addr; 688 key2 = laddr; 689 } 690 #endif 691 692 rdomain = rtable_l2(rtable); 693 hash = in6_pcbhash(table, rdomain, &zeroin6_addr, 0, key1, lport); 694 695 mtx_enter(&table->inpt_mtx); 696 inp = in6_pcbhash_lookup(table, hash, rdomain, 697 &zeroin6_addr, 0, key1, lport); 698 if (inp == NULL && ! IN6_ARE_ADDR_EQUAL(key1, key2)) { 699 hash = in6_pcbhash(table, rdomain, 700 &zeroin6_addr, 0, key2, lport); 701 inp = in6_pcbhash_lookup(table, hash, rdomain, 702 &zeroin6_addr, 0, key2, lport); 703 } 704 in_pcbref(inp); 705 mtx_leave(&table->inpt_mtx); 706 707 #ifdef DIAGNOSTIC 708 if (inp == NULL && in_pcbnotifymiss) { 709 printf("%s: laddr= lport=%d rdom=%u\n", 710 __func__, ntohs(lport), rdomain); 711 } 712 #endif 713 return (inp); 714 } 715