xref: /openbsd/sys/netinet6/in6_pcb.c (revision 9e6efb0a)
1 /*	$OpenBSD: in6_pcb.c,v 1.144 2024/04/12 16:07:09 bluhm Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
34  *
35  * NRL grants permission for redistribution and use in source and binary
36  * forms, with or without modification, of the software and documentation
37  * created at NRL provided that the following conditions are met:
38  *
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. All advertising materials mentioning features or use of this software
45  *    must display the following acknowledgements:
46  *	This product includes software developed by the University of
47  *	California, Berkeley and its contributors.
48  *	This product includes software developed at the Information
49  *	Technology Division, US Naval Research Laboratory.
50  * 4. Neither the name of the NRL nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
55  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
56  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
57  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
58  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
59  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
60  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
61  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
62  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
63  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
64  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65  *
66  * The views and conclusions contained in the software and documentation
67  * are those of the authors and should not be interpreted as representing
68  * official policies, either expressed or implied, of the US Naval
69  * Research Laboratory (NRL).
70  */
71 
72 /*
73  * Copyright (c) 1982, 1986, 1990, 1993, 1995
74  *	Regents of the University of California.  All rights reserved.
75  *
76  * Redistribution and use in source and binary forms, with or without
77  * modification, are permitted provided that the following conditions
78  * are met:
79  * 1. Redistributions of source code must retain the above copyright
80  *    notice, this list of conditions and the following disclaimer.
81  * 2. Redistributions in binary form must reproduce the above copyright
82  *    notice, this list of conditions and the following disclaimer in the
83  *    documentation and/or other materials provided with the distribution.
84  * 3. Neither the name of the University nor the names of its contributors
85  *    may be used to endorse or promote products derived from this software
86  *    without specific prior written permission.
87  *
88  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
89  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
90  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
91  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
92  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
93  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
94  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
95  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
96  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
97  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
98  * SUCH DAMAGE.
99  *
100  */
101 
102 #include "pf.h"
103 #include "stoeplitz.h"
104 
105 #include <sys/param.h>
106 #include <sys/systm.h>
107 #include <sys/mbuf.h>
108 #include <sys/protosw.h>
109 #include <sys/socket.h>
110 #include <sys/socketvar.h>
111 
112 #include <net/if.h>
113 #include <net/if_var.h>
114 #include <net/pfvar.h>
115 
116 #include <netinet/in.h>
117 #include <netinet6/in6_var.h>
118 #include <netinet/ip.h>
119 #include <netinet/ip_var.h>
120 #include <netinet6/ip6_var.h>
121 #include <netinet/in_pcb.h>
122 
123 #if NSTOEPLITZ > 0
124 #include <net/toeplitz.h>
125 #endif
126 
127 const struct in6_addr zeroin6_addr;
128 
129 struct inpcb *in6_pcbhash_lookup(struct inpcbtable *, uint64_t, u_int,
130     const struct in6_addr *, u_short, const struct in6_addr *, u_short);
131 
132 struct inpcb * in6_pcblookup_lock(struct inpcbtable *, const struct in6_addr *,
133     u_int, const struct in6_addr *, u_int, u_int, int);
134 
135 uint64_t
136 in6_pcbhash(struct inpcbtable *table, u_int rdomain,
137     const struct in6_addr *faddr, u_short fport,
138     const struct in6_addr *laddr, u_short lport)
139 {
140 	SIPHASH_CTX ctx;
141 	u_int32_t nrdom = htonl(rdomain);
142 
143 	SipHash24_Init(&ctx, &table->inpt_key);
144 	SipHash24_Update(&ctx, &nrdom, sizeof(nrdom));
145 	SipHash24_Update(&ctx, faddr, sizeof(*faddr));
146 	SipHash24_Update(&ctx, &fport, sizeof(fport));
147 	SipHash24_Update(&ctx, laddr, sizeof(*laddr));
148 	SipHash24_Update(&ctx, &lport, sizeof(lport));
149 	return SipHash24_End(&ctx);
150 }
151 
152 int
153 in6_pcbaddrisavail_lock(const struct inpcb *inp, struct sockaddr_in6 *sin6,
154     int wild, struct proc *p, int lock)
155 {
156 	struct socket *so = inp->inp_socket;
157 	struct inpcbtable *table = inp->inp_table;
158 	u_short lport = sin6->sin6_port;
159 	int reuseport = (so->so_options & SO_REUSEPORT);
160 
161 	wild |= INPLOOKUP_IPV6;
162 	/* KAME hack: embed scopeid */
163 	if (in6_embedscope(&sin6->sin6_addr, sin6,
164 	    inp->inp_outputopts6, inp->inp_moptions6) != 0)
165 		return (EINVAL);
166 	/* this must be cleared for ifa_ifwithaddr() */
167 	sin6->sin6_scope_id = 0;
168 	/* reject IPv4 mapped address, we have no support for it */
169 	if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
170 		return (EADDRNOTAVAIL);
171 
172 	if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
173 		/*
174 		 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
175 		 * allow complete duplication of binding if
176 		 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
177 		 * and a multicast address is bound on both
178 		 * new and duplicated sockets.
179 		 */
180 		if (so->so_options & (SO_REUSEADDR|SO_REUSEPORT))
181 			reuseport = SO_REUSEADDR | SO_REUSEPORT;
182 	} else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
183 		struct ifaddr *ifa = NULL;
184 
185 		sin6->sin6_port = 0;  /*
186 				       * Yechhhh, because of upcoming
187 				       * call to ifa_ifwithaddr(), which
188 				       * does bcmp's over the PORTS as
189 				       * well.  (What about flow?)
190 				       */
191 		sin6->sin6_flowinfo = 0;
192 		if (!(so->so_options & SO_BINDANY) &&
193 		    (ifa = ifa_ifwithaddr(sin6tosa(sin6),
194 		    inp->inp_rtableid)) == NULL)
195 			return (EADDRNOTAVAIL);
196 		sin6->sin6_port = lport;
197 
198 		/*
199 		 * bind to an anycast address might accidentally
200 		 * cause sending a packet with an anycast source
201 		 * address, so we forbid it.
202 		 *
203 		 * We should allow to bind to a deprecated address,
204 		 * since the application dare to use it.
205 		 * But, can we assume that they are careful enough
206 		 * to check if the address is deprecated or not?
207 		 * Maybe, as a safeguard, we should have a setsockopt
208 		 * flag to control the bind(2) behavior against
209 		 * deprecated addresses (default: forbid bind(2)).
210 		 */
211 		if (ifa && ifatoia6(ifa)->ia6_flags & (IN6_IFF_ANYCAST|
212 		    IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED|IN6_IFF_DETACHED))
213 			return (EADDRNOTAVAIL);
214 	}
215 	if (lport) {
216 		struct inpcb *t;
217 		int error = 0;
218 
219 		if (so->so_euid && !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
220 			t = in_pcblookup_local_lock(table, &sin6->sin6_addr,
221 			    lport, INPLOOKUP_WILDCARD | INPLOOKUP_IPV6,
222 			    inp->inp_rtableid, lock);
223 			if (t && (so->so_euid != t->inp_socket->so_euid))
224 				error = EADDRINUSE;
225 			if (lock == IN_PCBLOCK_GRAB)
226 				in_pcbunref(t);
227 			if (error)
228 				return (error);
229 		}
230 		t = in_pcblookup_local_lock(table, &sin6->sin6_addr, lport,
231 		    wild, inp->inp_rtableid, lock);
232 		if (t && (reuseport & t->inp_socket->so_options) == 0)
233 			error = EADDRINUSE;
234 		if (lock == IN_PCBLOCK_GRAB)
235 			in_pcbunref(t);
236 		if (error)
237 			return (error);
238 	}
239 	return (0);
240 }
241 
242 int
243 in6_pcbaddrisavail(const struct inpcb *inp, struct sockaddr_in6 *sin6,
244     int wild, struct proc *p)
245 {
246 	return in6_pcbaddrisavail_lock(inp, sin6, wild, p, IN_PCBLOCK_GRAB);
247 }
248 
249 /*
250  * Connect from a socket to a specified address.
251  * Both address and port must be specified in argument sin6.
252  * Eventually, flow labels will have to be dealt with here, as well.
253  *
254  * If don't have a local address for this socket yet,
255  * then pick one.
256  */
257 int
258 in6_pcbconnect(struct inpcb *inp, struct mbuf *nam)
259 {
260 	struct inpcbtable *table = inp->inp_table;
261 	const struct in6_addr *in6a;
262 	struct sockaddr_in6 *sin6;
263 	struct inpcb *t;
264 	int error;
265 	struct sockaddr_in6 tmp;
266 
267 	KASSERT(ISSET(inp->inp_flags, INP_IPV6));
268 
269 	if ((error = in6_nam2sin6(nam, &sin6)))
270 		return (error);
271 	if (sin6->sin6_port == 0)
272 		return (EADDRNOTAVAIL);
273 	/* reject IPv4 mapped address, we have no support for it */
274 	if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
275 		return (EADDRNOTAVAIL);
276 
277 	/* protect *sin6 from overwrites */
278 	tmp = *sin6;
279 	sin6 = &tmp;
280 
281 	/* KAME hack: embed scopeid */
282 	if (in6_embedscope(&sin6->sin6_addr, sin6,
283 	    inp->inp_outputopts6, inp->inp_moptions6) != 0)
284 		return (EINVAL);
285 	/* this must be cleared for ifa_ifwithaddr() */
286 	sin6->sin6_scope_id = 0;
287 
288 	/* Source address selection. */
289 	/*
290 	 * XXX: in6_selectsrc might replace the bound local address
291 	 * with the address specified by setsockopt(IPV6_PKTINFO).
292 	 * Is it the intended behavior?
293 	 */
294 	error = in6_pcbselsrc(&in6a, sin6, inp, inp->inp_outputopts6);
295 	if (error)
296 		return (error);
297 
298 	inp->inp_ipv6.ip6_hlim = (u_int8_t)in6_selecthlim(inp);
299 
300 	/* keep lookup, modification, and rehash in sync */
301 	mtx_enter(&table->inpt_mtx);
302 
303 	t = in6_pcblookup_lock(inp->inp_table, &sin6->sin6_addr,
304 	    sin6->sin6_port,
305 	    IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6) ? in6a : &inp->inp_laddr6,
306 	    inp->inp_lport, inp->inp_rtableid, IN_PCBLOCK_HOLD);
307 	if (t != NULL) {
308 		mtx_leave(&table->inpt_mtx);
309 		return (EADDRINUSE);
310 	}
311 
312 	KASSERT(IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6) || inp->inp_lport);
313 
314 	if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) {
315 		if (inp->inp_lport == 0) {
316 			error = in_pcbbind_locked(inp, NULL, in6a, curproc);
317 			if (error) {
318 				mtx_leave(&table->inpt_mtx);
319 				return (error);
320 			}
321 			t = in6_pcblookup_lock(inp->inp_table, &sin6->sin6_addr,
322 			    sin6->sin6_port, in6a, inp->inp_lport,
323 			    inp->inp_rtableid, IN_PCBLOCK_HOLD);
324 			if (t != NULL) {
325 				inp->inp_lport = 0;
326 				mtx_leave(&table->inpt_mtx);
327 				return (EADDRINUSE);
328 			}
329 		}
330 		inp->inp_laddr6 = *in6a;
331 	}
332 	inp->inp_faddr6 = sin6->sin6_addr;
333 	inp->inp_fport = sin6->sin6_port;
334 	in_pcbrehash(inp);
335 
336 	mtx_leave(&table->inpt_mtx);
337 
338 	inp->inp_flowinfo &= ~IPV6_FLOWLABEL_MASK;
339 	if (ip6_auto_flowlabel)
340 		inp->inp_flowinfo |=
341 		    (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
342 #if NSTOEPLITZ > 0
343 	inp->inp_flowid = stoeplitz_ip6port(&inp->inp_faddr6,
344 	    &inp->inp_laddr6, inp->inp_fport, inp->inp_lport);
345 #endif
346 	return (0);
347 }
348 
349 /*
350  * Get the local address/port, and put it in a sockaddr_in6.
351  * This services the getsockname(2) call.
352  */
353 void
354 in6_setsockaddr(struct inpcb *inp, struct mbuf *nam)
355 {
356 	struct sockaddr_in6 *sin6;
357 
358 	nam->m_len = sizeof(struct sockaddr_in6);
359 	sin6 = mtod(nam,struct sockaddr_in6 *);
360 
361 	bzero ((caddr_t)sin6,sizeof(struct sockaddr_in6));
362 	sin6->sin6_family = AF_INET6;
363 	sin6->sin6_len = sizeof(struct sockaddr_in6);
364 	sin6->sin6_port = inp->inp_lport;
365 	sin6->sin6_addr = inp->inp_laddr6;
366 	/* KAME hack: recover scopeid */
367 	in6_recoverscope(sin6, &inp->inp_laddr6);
368 }
369 
370 /*
371  * Get the foreign address/port, and put it in a sockaddr_in6.
372  * This services the getpeername(2) call.
373  */
374 void
375 in6_setpeeraddr(struct inpcb *inp, struct mbuf *nam)
376 {
377 	struct sockaddr_in6 *sin6;
378 
379 	nam->m_len = sizeof(struct sockaddr_in6);
380 	sin6 = mtod(nam,struct sockaddr_in6 *);
381 
382 	bzero ((caddr_t)sin6,sizeof(struct sockaddr_in6));
383 	sin6->sin6_family = AF_INET6;
384 	sin6->sin6_len = sizeof(struct sockaddr_in6);
385 	sin6->sin6_port = inp->inp_fport;
386 	sin6->sin6_addr = inp->inp_faddr6;
387 	/* KAME hack: recover scopeid */
388 	in6_recoverscope(sin6, &inp->inp_faddr6);
389 }
390 
391 int
392 in6_sockaddr(struct socket *so, struct mbuf *nam)
393 {
394 	struct inpcb *inp;
395 
396 	inp = sotoinpcb(so);
397 	in6_setsockaddr(inp, nam);
398 
399 	return (0);
400 }
401 
402 int
403 in6_peeraddr(struct socket *so, struct mbuf *nam)
404 {
405 	struct inpcb *inp;
406 
407 	inp = sotoinpcb(so);
408 	in6_setpeeraddr(inp, nam);
409 
410 	return (0);
411 }
412 
413 /*
414  * Pass some notification to all connections of a protocol
415  * associated with address dst.  The local address and/or port numbers
416  * may be specified to limit the search.  The "usual action" will be
417  * taken, depending on the ctlinput cmd.  The caller must filter any
418  * cmds that are uninteresting (e.g., no error in the map).
419  * Call the protocol specific routine (if any) to report
420  * any errors for each matching socket.
421  *
422  * Also perform input-side security policy check
423  *    once PCB to be notified has been located.
424  */
425 void
426 in6_pcbnotify(struct inpcbtable *table, const struct sockaddr_in6 *dst,
427     uint fport_arg, const struct sockaddr_in6 *src, uint lport_arg,
428     u_int rtable, int cmd, void *cmdarg, void (*notify)(struct inpcb *, int))
429 {
430 	SIMPLEQ_HEAD(, inpcb) inpcblist;
431 	struct inpcb *inp;
432 	u_short fport = fport_arg, lport = lport_arg;
433 	struct sockaddr_in6 sa6_src;
434 	int errno;
435 	u_int32_t flowinfo;
436 	u_int rdomain;
437 
438 	if ((unsigned)cmd >= PRC_NCMDS)
439 		return;
440 
441 	if (IN6_IS_ADDR_UNSPECIFIED(&dst->sin6_addr))
442 		return;
443 	if (IN6_IS_ADDR_V4MAPPED(&dst->sin6_addr)) {
444 #ifdef DIAGNOSTIC
445 		printf("%s: Huh?  Thought we never got "
446 		       "called with mapped!\n", __func__);
447 #endif
448 		return;
449 	}
450 
451 	/*
452 	 * note that src can be NULL when we get notify by local fragmentation.
453 	 */
454 	sa6_src = (src == NULL) ? sa6_any : *src;
455 	flowinfo = sa6_src.sin6_flowinfo;
456 
457 	/*
458 	 * Redirects go to all references to the destination,
459 	 * and use in_rtchange to invalidate the route cache.
460 	 * Dead host indications: also use in_rtchange to invalidate
461 	 * the cache, and deliver the error to all the sockets.
462 	 * Otherwise, if we have knowledge of the local port and address,
463 	 * deliver only to that socket.
464 	 */
465 	if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
466 		fport = 0;
467 		lport = 0;
468 		sa6_src.sin6_addr = in6addr_any;
469 
470 		if (cmd != PRC_HOSTDEAD)
471 			notify = in_rtchange;
472 	}
473 	errno = inet6ctlerrmap[cmd];
474 	if (notify == NULL)
475 		return;
476 
477 	SIMPLEQ_INIT(&inpcblist);
478 	rdomain = rtable_l2(rtable);
479 	rw_enter_write(&table->inpt_notify);
480 	mtx_enter(&table->inpt_mtx);
481 	TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
482 		KASSERT(ISSET(inp->inp_flags, INP_IPV6));
483 
484 		/*
485 		 * Under the following condition, notify of redirects
486 		 * to the pcb, without making address matches against inpcb.
487 		 * - redirect notification is arrived.
488 		 * - the inpcb is unconnected.
489 		 * - the inpcb is caching !RTF_HOST routing entry.
490 		 * - the ICMPv6 notification is from the gateway cached in the
491 		 *   inpcb.  i.e. ICMPv6 notification is from nexthop gateway
492 		 *   the inpcb used very recently.
493 		 *
494 		 * This is to improve interaction between netbsd/openbsd
495 		 * redirect handling code, and inpcb route cache code.
496 		 * without the clause, !RTF_HOST routing entry (which carries
497 		 * gateway used by inpcb right before the ICMPv6 redirect)
498 		 * will be cached forever in unconnected inpcb.
499 		 *
500 		 * There still is a question regarding to what is TRT:
501 		 * - On bsdi/freebsd, RTF_HOST (cloned) routing entry will be
502 		 *   generated on packet output.  inpcb will always cache
503 		 *   RTF_HOST routing entry so there's no need for the clause
504 		 *   (ICMPv6 redirect will update RTF_HOST routing entry,
505 		 *   and inpcb is caching it already).
506 		 *   However, bsdi/freebsd are vulnerable to local DoS attacks
507 		 *   due to the cloned routing entries.
508 		 * - Specwise, "destination cache" is mentioned in RFC2461.
509 		 *   Jinmei says that it implies bsdi/freebsd behavior, itojun
510 		 *   is not really convinced.
511 		 * - Having hiwat/lowat on # of cloned host route (redirect/
512 		 *   pmtud) may be a good idea.  netbsd/openbsd has it.  see
513 		 *   icmp6_mtudisc_update().
514 		 */
515 		if ((PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) &&
516 		    IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6) &&
517 		    inp->inp_route.ro_rt &&
518 		    !(inp->inp_route.ro_rt->rt_flags & RTF_HOST) &&
519 		    IN6_ARE_ADDR_EQUAL(&inp->inp_route.ro_dstsin6.sin6_addr,
520 		    &dst->sin6_addr)) {
521 			goto do_notify;
522 		}
523 
524 		/*
525 		 * Detect if we should notify the error. If no source and
526 		 * destination ports are specified, but non-zero flowinfo and
527 		 * local address match, notify the error. This is the case
528 		 * when the error is delivered with an encrypted buffer
529 		 * by ESP. Otherwise, just compare addresses and ports
530 		 * as usual.
531 		 */
532 		if (lport == 0 && fport == 0 && flowinfo &&
533 		    flowinfo == (inp->inp_flowinfo & IPV6_FLOWLABEL_MASK) &&
534 		    IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, &sa6_src.sin6_addr))
535 			goto do_notify;
536 		else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6,
537 					     &dst->sin6_addr) ||
538 			 rtable_l2(inp->inp_rtableid) != rdomain ||
539 			 (lport && inp->inp_lport != lport) ||
540 			 (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) &&
541 			  !IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6,
542 					      &sa6_src.sin6_addr)) ||
543 			 (fport && inp->inp_fport != fport)) {
544 			continue;
545 		}
546 	  do_notify:
547 		in_pcbref(inp);
548 		SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify);
549 	}
550 	mtx_leave(&table->inpt_mtx);
551 
552 	while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
553 		SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify);
554 		(*notify)(inp, errno);
555 		in_pcbunref(inp);
556 	}
557 	rw_exit_write(&table->inpt_notify);
558 }
559 
560 struct rtentry *
561 in6_pcbrtentry(struct inpcb *inp)
562 {
563 	if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
564 		return (NULL);
565 	return (route6_mpath(&inp->inp_route, &inp->inp_faddr6,
566 	    &inp->inp_laddr6, inp->inp_rtableid));
567 }
568 
569 struct inpcb *
570 in6_pcbhash_lookup(struct inpcbtable *table, uint64_t hash, u_int rdomain,
571     const struct in6_addr *faddr, u_short fport,
572     const struct in6_addr *laddr, u_short lport)
573 {
574 	struct inpcbhead *head;
575 	struct inpcb *inp;
576 
577 	NET_ASSERT_LOCKED();
578 	MUTEX_ASSERT_LOCKED(&table->inpt_mtx);
579 
580 	head = &table->inpt_hashtbl[hash & table->inpt_mask];
581 	LIST_FOREACH(inp, head, inp_hash) {
582 		KASSERT(ISSET(inp->inp_flags, INP_IPV6));
583 
584 		if (inp->inp_fport == fport && inp->inp_lport == lport &&
585 		    IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, faddr) &&
586 		    IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, laddr) &&
587 		    rtable_l2(inp->inp_rtableid) == rdomain) {
588 			break;
589 		}
590 	}
591 	if (inp != NULL) {
592 		/*
593 		 * Move this PCB to the head of hash chain so that
594 		 * repeated accesses are quicker.  This is analogous to
595 		 * the historic single-entry PCB cache.
596 		 */
597 		if (inp != LIST_FIRST(head)) {
598 			LIST_REMOVE(inp, inp_hash);
599 			LIST_INSERT_HEAD(head, inp, inp_hash);
600 		}
601 	}
602 	return (inp);
603 }
604 
605 struct inpcb *
606 in6_pcblookup_lock(struct inpcbtable *table, const struct in6_addr *faddr,
607     u_int fport, const struct in6_addr *laddr, u_int lport, u_int rtable,
608     int lock)
609 {
610 	struct inpcb *inp;
611 	uint64_t hash;
612 	u_int rdomain;
613 
614 	rdomain = rtable_l2(rtable);
615 	hash = in6_pcbhash(table, rdomain, faddr, fport, laddr, lport);
616 
617 	if (lock == IN_PCBLOCK_GRAB) {
618 		mtx_enter(&table->inpt_mtx);
619 	} else {
620 		KASSERT(lock == IN_PCBLOCK_HOLD);
621 		MUTEX_ASSERT_LOCKED(&table->inpt_mtx);
622 	}
623 	inp = in6_pcbhash_lookup(table, hash, rdomain,
624 	    faddr, fport, laddr, lport);
625 	if (lock == IN_PCBLOCK_GRAB) {
626 		in_pcbref(inp);
627 		mtx_leave(&table->inpt_mtx);
628 	}
629 
630 #ifdef DIAGNOSTIC
631 	if (inp == NULL && in_pcbnotifymiss) {
632 		printf("%s: faddr= fport=%d laddr= lport=%d rdom=%u\n",
633 		    __func__, ntohs(fport), ntohs(lport), rdomain);
634 	}
635 #endif
636 	return (inp);
637 }
638 
639 struct inpcb *
640 in6_pcblookup(struct inpcbtable *table, const struct in6_addr *faddr,
641     u_int fport, const struct in6_addr *laddr, u_int lport, u_int rtable)
642 {
643 	return in6_pcblookup_lock(table, faddr, fport, laddr, lport, rtable,
644 	    IN_PCBLOCK_GRAB);
645 }
646 
647 struct inpcb *
648 in6_pcblookup_listen(struct inpcbtable *table, struct in6_addr *laddr,
649     u_int lport, struct mbuf *m, u_int rtable)
650 {
651 	const struct in6_addr *key1, *key2;
652 	struct inpcb *inp;
653 	uint64_t hash;
654 	u_int rdomain;
655 
656 	key1 = laddr;
657 	key2 = &zeroin6_addr;
658 #if NPF > 0
659 	if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
660 		struct pf_divert *divert;
661 
662 		divert = pf_find_divert(m);
663 		KASSERT(divert != NULL);
664 		switch (divert->type) {
665 		case PF_DIVERT_TO:
666 			key1 = key2 = &divert->addr.v6;
667 			lport = divert->port;
668 			break;
669 		case PF_DIVERT_REPLY:
670 			return (NULL);
671 		default:
672 			panic("%s: unknown divert type %d, mbuf %p, divert %p",
673 			    __func__, divert->type, m, divert);
674 		}
675 	} else if (m && m->m_pkthdr.pf.flags & PF_TAG_TRANSLATE_LOCALHOST) {
676 		/*
677 		 * Redirected connections should not be treated the same
678 		 * as connections directed to ::1 since localhost
679 		 * can only be accessed from the host itself.
680 		 */
681 		key1 = &zeroin6_addr;
682 		key2 = laddr;
683 	}
684 #endif
685 
686 	rdomain = rtable_l2(rtable);
687 	hash = in6_pcbhash(table, rdomain, &zeroin6_addr, 0, key1, lport);
688 
689 	mtx_enter(&table->inpt_mtx);
690 	inp = in6_pcbhash_lookup(table, hash, rdomain,
691 	    &zeroin6_addr, 0, key1, lport);
692 	if (inp == NULL && ! IN6_ARE_ADDR_EQUAL(key1, key2)) {
693 		hash = in6_pcbhash(table, rdomain,
694 		    &zeroin6_addr, 0, key2, lport);
695 		inp = in6_pcbhash_lookup(table, hash, rdomain,
696 		    &zeroin6_addr, 0, key2, lport);
697 	}
698 	in_pcbref(inp);
699 	mtx_leave(&table->inpt_mtx);
700 
701 #ifdef DIAGNOSTIC
702 	if (inp == NULL && in_pcbnotifymiss) {
703 		printf("%s: laddr= lport=%d rdom=%u\n",
704 		    __func__, ntohs(lport), rdomain);
705 	}
706 #endif
707 	return (inp);
708 }
709