xref: /openbsd/sys/netinet6/icmp6.c (revision 9b7c3dbb)
1 /*	$OpenBSD: icmp6.c,v 1.190 2016/08/24 09:38:29 mpi Exp $	*/
2 /*	$KAME: icmp6.c,v 1.217 2001/06/20 15:03:29 jinmei Exp $	*/
3 
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1982, 1986, 1988, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
62  */
63 
64 #include "carp.h"
65 #include "pf.h"
66 
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/malloc.h>
70 #include <sys/mbuf.h>
71 #include <sys/sysctl.h>
72 #include <sys/protosw.h>
73 #include <sys/socket.h>
74 #include <sys/socketvar.h>
75 #include <sys/time.h>
76 #include <sys/kernel.h>
77 #include <sys/syslog.h>
78 #include <sys/domain.h>
79 
80 #include <net/if.h>
81 #include <net/if_var.h>
82 #include <net/route.h>
83 #include <net/if_dl.h>
84 #include <net/if_types.h>
85 
86 #include <netinet/in.h>
87 #include <netinet/ip.h>
88 #include <netinet6/in6_var.h>
89 #include <netinet/ip6.h>
90 #include <netinet6/ip6_var.h>
91 #include <netinet/icmp6.h>
92 #include <netinet6/mld6_var.h>
93 #include <netinet/in_pcb.h>
94 #include <netinet6/nd6.h>
95 #include <netinet6/ip6protosw.h>
96 
97 #if NCARP > 0
98 #include <netinet/ip_carp.h>
99 #endif
100 
101 #if NPF > 0
102 #include <net/pfvar.h>
103 #endif
104 
105 struct icmp6stat icmp6stat;
106 
107 extern struct inpcbtable rawin6pcbtable;
108 extern int icmp6errppslim;
109 static int icmp6errpps_count = 0;
110 static struct timeval icmp6errppslim_last;
111 
112 /*
113  * List of callbacks to notify when Path MTU changes are made.
114  */
115 struct icmp6_mtudisc_callback {
116 	LIST_ENTRY(icmp6_mtudisc_callback) mc_list;
117 	void (*mc_func)(struct sockaddr_in6 *, u_int);
118 };
119 
120 LIST_HEAD(, icmp6_mtudisc_callback) icmp6_mtudisc_callbacks =
121     LIST_HEAD_INITIALIZER(icmp6_mtudisc_callbacks);
122 
123 struct rttimer_queue *icmp6_mtudisc_timeout_q = NULL;
124 
125 /* XXX do these values make any sense? */
126 static int icmp6_mtudisc_hiwat = 1280;
127 static int icmp6_mtudisc_lowat = 256;
128 
129 /*
130  * keep track of # of redirect routes.
131  */
132 static struct rttimer_queue *icmp6_redirect_timeout_q = NULL;
133 
134 /* XXX experimental, turned off */
135 static int icmp6_redirect_lowat = -1;
136 
137 void	icmp6_errcount(struct icmp6errstat *, int, int);
138 int	icmp6_rip6_input(struct mbuf **, int);
139 int	icmp6_ratelimit(const struct in6_addr *, const int, const int);
140 const char *icmp6_redirect_diag(struct in6_addr *, struct in6_addr *,
141 	    struct in6_addr *);
142 int	icmp6_notify_error(struct mbuf *, int, int, int);
143 struct rtentry *icmp6_mtudisc_clone(struct sockaddr *, u_int);
144 void	icmp6_mtudisc_timeout(struct rtentry *, struct rttimer *);
145 void	icmp6_redirect_timeout(struct rtentry *, struct rttimer *);
146 
147 void
148 icmp6_init(void)
149 {
150 	mld6_init();
151 	icmp6_mtudisc_timeout_q = rt_timer_queue_create(ip6_mtudisc_timeout);
152 	icmp6_redirect_timeout_q = rt_timer_queue_create(icmp6_redirtimeout);
153 }
154 
155 void
156 icmp6_errcount(struct icmp6errstat *stat, int type, int code)
157 {
158 	switch (type) {
159 	case ICMP6_DST_UNREACH:
160 		switch (code) {
161 		case ICMP6_DST_UNREACH_NOROUTE:
162 			stat->icp6errs_dst_unreach_noroute++;
163 			return;
164 		case ICMP6_DST_UNREACH_ADMIN:
165 			stat->icp6errs_dst_unreach_admin++;
166 			return;
167 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
168 			stat->icp6errs_dst_unreach_beyondscope++;
169 			return;
170 		case ICMP6_DST_UNREACH_ADDR:
171 			stat->icp6errs_dst_unreach_addr++;
172 			return;
173 		case ICMP6_DST_UNREACH_NOPORT:
174 			stat->icp6errs_dst_unreach_noport++;
175 			return;
176 		}
177 		break;
178 	case ICMP6_PACKET_TOO_BIG:
179 		stat->icp6errs_packet_too_big++;
180 		return;
181 	case ICMP6_TIME_EXCEEDED:
182 		switch (code) {
183 		case ICMP6_TIME_EXCEED_TRANSIT:
184 			stat->icp6errs_time_exceed_transit++;
185 			return;
186 		case ICMP6_TIME_EXCEED_REASSEMBLY:
187 			stat->icp6errs_time_exceed_reassembly++;
188 			return;
189 		}
190 		break;
191 	case ICMP6_PARAM_PROB:
192 		switch (code) {
193 		case ICMP6_PARAMPROB_HEADER:
194 			stat->icp6errs_paramprob_header++;
195 			return;
196 		case ICMP6_PARAMPROB_NEXTHEADER:
197 			stat->icp6errs_paramprob_nextheader++;
198 			return;
199 		case ICMP6_PARAMPROB_OPTION:
200 			stat->icp6errs_paramprob_option++;
201 			return;
202 		}
203 		break;
204 	case ND_REDIRECT:
205 		stat->icp6errs_redirect++;
206 		return;
207 	}
208 	stat->icp6errs_unknown++;
209 }
210 
211 /*
212  * Register a Path MTU Discovery callback.
213  */
214 void
215 icmp6_mtudisc_callback_register(void (*func)(struct sockaddr_in6 *, u_int))
216 {
217 	struct icmp6_mtudisc_callback *mc;
218 
219 	for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL;
220 	     mc = LIST_NEXT(mc, mc_list)) {
221 		if (mc->mc_func == func)
222 			return;
223 	}
224 
225 	mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
226 	if (mc == NULL)
227 		panic("icmp6_mtudisc_callback_register");
228 
229 	mc->mc_func = func;
230 	LIST_INSERT_HEAD(&icmp6_mtudisc_callbacks, mc, mc_list);
231 }
232 
233 /*
234  * Generate an error packet of type error in response to bad IP6 packet.
235  */
236 void
237 icmp6_error(struct mbuf *m, int type, int code, int param)
238 {
239 	struct ip6_hdr *oip6, *nip6;
240 	struct icmp6_hdr *icmp6;
241 	u_int preplen;
242 	int off;
243 	int nxt;
244 
245 	icmp6stat.icp6s_error++;
246 
247 	/* count per-type-code statistics */
248 	icmp6_errcount(&icmp6stat.icp6s_outerrhist, type, code);
249 
250 	if (m->m_len < sizeof(struct ip6_hdr)) {
251 		m = m_pullup(m, sizeof(struct ip6_hdr));
252 		if (m == NULL)
253 			return;
254 	}
255 	oip6 = mtod(m, struct ip6_hdr *);
256 
257 	/*
258 	 * If the destination address of the erroneous packet is a multicast
259 	 * address, or the packet was sent using link-layer multicast,
260 	 * we should basically suppress sending an error (RFC 2463, Section
261 	 * 2.4).
262 	 * We have two exceptions (the item e.2 in that section):
263 	 * - the Packet Too Big message can be sent for path MTU discovery.
264 	 * - the Parameter Problem Message that can be allowed an icmp6 error
265 	 *   in the option type field.  This check has been done in
266 	 *   ip6_unknown_opt(), so we can just check the type and code.
267 	 */
268 	if ((m->m_flags & (M_BCAST|M_MCAST) ||
269 	     IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
270 	    (type != ICMP6_PACKET_TOO_BIG &&
271 	     (type != ICMP6_PARAM_PROB ||
272 	      code != ICMP6_PARAMPROB_OPTION)))
273 		goto freeit;
274 
275 	/*
276 	 * RFC 2463, 2.4 (e.5): source address check.
277 	 * XXX: the case of anycast source?
278 	 */
279 	if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
280 	    IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
281 		goto freeit;
282 
283 	/*
284 	 * If we are about to send ICMPv6 against ICMPv6 error/redirect,
285 	 * don't do it.
286 	 */
287 	nxt = -1;
288 	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
289 	if (off >= 0 && nxt == IPPROTO_ICMPV6) {
290 		struct icmp6_hdr *icp;
291 
292 		IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
293 			sizeof(*icp));
294 		if (icp == NULL) {
295 			icmp6stat.icp6s_tooshort++;
296 			return;
297 		}
298 		if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
299 		    icp->icmp6_type == ND_REDIRECT) {
300 			/*
301 			 * ICMPv6 error
302 			 * Special case: for redirect (which is
303 			 * informational) we must not send icmp6 error.
304 			 */
305 			icmp6stat.icp6s_canterror++;
306 			goto freeit;
307 		} else {
308 			/* ICMPv6 informational - send the error */
309 		}
310 	}
311 	else {
312 		/* non-ICMPv6 - send the error */
313 	}
314 
315 	oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
316 
317 	/* Finally, do rate limitation check. */
318 	if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
319 		icmp6stat.icp6s_toofreq++;
320 		goto freeit;
321 	}
322 
323 	/*
324 	 * OK, ICMP6 can be generated.
325 	 */
326 
327 	if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
328 		m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
329 
330 	preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
331 	M_PREPEND(m, preplen, M_DONTWAIT);
332 	if (m && m->m_len < preplen)
333 		m = m_pullup(m, preplen);
334 	if (m == NULL) {
335 		nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
336 		return;
337 	}
338 
339 	nip6 = mtod(m, struct ip6_hdr *);
340 	nip6->ip6_src  = oip6->ip6_src;
341 	nip6->ip6_dst  = oip6->ip6_dst;
342 
343 	if (IN6_IS_SCOPE_EMBED(&oip6->ip6_src))
344 		oip6->ip6_src.s6_addr16[1] = 0;
345 	if (IN6_IS_SCOPE_EMBED(&oip6->ip6_dst))
346 		oip6->ip6_dst.s6_addr16[1] = 0;
347 
348 	icmp6 = (struct icmp6_hdr *)(nip6 + 1);
349 	icmp6->icmp6_type = type;
350 	icmp6->icmp6_code = code;
351 	icmp6->icmp6_pptr = htonl((u_int32_t)param);
352 
353 	/*
354 	 * icmp6_reflect() is designed to be in the input path.
355 	 * icmp6_error() can be called from both input and outut path,
356 	 * and if we are in output path rcvif could contain bogus value.
357 	 * clear m->m_pkthdr.ph_ifidx for safety, we should have enough
358 	 * scope information in ip header (nip6).
359 	 */
360 	m->m_pkthdr.ph_ifidx = 0;
361 
362 	icmp6stat.icp6s_outhist[type]++;
363 	icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */
364 
365 	return;
366 
367   freeit:
368 	/*
369 	 * If we can't tell wheter or not we can generate ICMP6, free it.
370 	 */
371 	m_freem(m);
372 }
373 
374 /*
375  * Process a received ICMP6 message.
376  */
377 int
378 icmp6_input(struct mbuf **mp, int *offp, int proto)
379 {
380 #if NCARP > 0
381 	struct ifnet *ifp;
382 #endif
383 	struct mbuf *m = *mp, *n;
384 	struct ip6_hdr *ip6, *nip6;
385 	struct icmp6_hdr *icmp6, *nicmp6;
386 	int off = *offp;
387 	int icmp6len = m->m_pkthdr.len - *offp;
388 	int code, sum, noff;
389 	char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN];
390 
391 	/*
392 	 * Locate icmp6 structure in mbuf, and check
393 	 * that not corrupted and of at least minimum length
394 	 */
395 
396 	ip6 = mtod(m, struct ip6_hdr *);
397 	if (icmp6len < sizeof(struct icmp6_hdr)) {
398 		icmp6stat.icp6s_tooshort++;
399 		goto freeit;
400 	}
401 
402 	/*
403 	 * calculate the checksum
404 	 */
405 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
406 	if (icmp6 == NULL) {
407 		icmp6stat.icp6s_tooshort++;
408 		return IPPROTO_DONE;
409 	}
410 	code = icmp6->icmp6_code;
411 
412 	if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
413 		nd6log((LOG_ERR,
414 		    "ICMP6 checksum error(%d|%x) %s\n",
415 		    icmp6->icmp6_type, sum,
416 		    inet_ntop(AF_INET6, &ip6->ip6_src, src, sizeof(src))));
417 		icmp6stat.icp6s_checksum++;
418 		goto freeit;
419 	}
420 
421 #if NPF > 0
422 	if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
423 		switch (icmp6->icmp6_type) {
424 		/*
425 		 * These ICMP6 types map to other connections.  They must be
426 		 * delivered to pr_ctlinput() also for diverted connections.
427 		 */
428 		case ICMP6_DST_UNREACH:
429 		case ICMP6_PACKET_TOO_BIG:
430 		case ICMP6_TIME_EXCEEDED:
431 		case ICMP6_PARAM_PROB:
432 			break;
433 		default:
434 			goto raw;
435 		}
436 	}
437 #endif /* NPF */
438 
439 #if NCARP > 0
440 	ifp = if_get(m->m_pkthdr.ph_ifidx);
441 	if (ifp == NULL)
442 		goto freeit;
443 
444 	if (ifp->if_type == IFT_CARP &&
445 	    icmp6->icmp6_type == ICMP6_ECHO_REQUEST &&
446 	    carp_lsdrop(m, AF_INET6, ip6->ip6_src.s6_addr32,
447 	    ip6->ip6_dst.s6_addr32)) {
448 		if_put(ifp);
449 		goto freeit;
450 	}
451 
452 	if_put(ifp);
453 #endif
454 	icmp6stat.icp6s_inhist[icmp6->icmp6_type]++;
455 
456 	switch (icmp6->icmp6_type) {
457 	case ICMP6_DST_UNREACH:
458 		switch (code) {
459 		case ICMP6_DST_UNREACH_NOROUTE:
460 			code = PRC_UNREACH_NET;
461 			break;
462 		case ICMP6_DST_UNREACH_ADMIN:
463 			code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
464 			break;
465 		case ICMP6_DST_UNREACH_ADDR:
466 			code = PRC_HOSTDEAD;
467 			break;
468 #ifdef COMPAT_RFC1885
469 		case ICMP6_DST_UNREACH_NOTNEIGHBOR:
470 			code = PRC_UNREACH_SRCFAIL;
471 			break;
472 #else
473 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
474 			/* I mean "source address was incorrect." */
475 			code = PRC_PARAMPROB;
476 			break;
477 #endif
478 		case ICMP6_DST_UNREACH_NOPORT:
479 			code = PRC_UNREACH_PORT;
480 			break;
481 		default:
482 			goto badcode;
483 		}
484 		goto deliver;
485 
486 	case ICMP6_PACKET_TOO_BIG:
487 		/* MTU is checked in icmp6_mtudisc_update. */
488 		code = PRC_MSGSIZE;
489 
490 		/*
491 		 * Updating the path MTU will be done after examining
492 		 * intermediate extension headers.
493 		 */
494 		goto deliver;
495 
496 	case ICMP6_TIME_EXCEEDED:
497 		switch (code) {
498 		case ICMP6_TIME_EXCEED_TRANSIT:
499 			code = PRC_TIMXCEED_INTRANS;
500 			break;
501 		case ICMP6_TIME_EXCEED_REASSEMBLY:
502 			code = PRC_TIMXCEED_REASS;
503 			break;
504 		default:
505 			goto badcode;
506 		}
507 		goto deliver;
508 
509 	case ICMP6_PARAM_PROB:
510 		switch (code) {
511 		case ICMP6_PARAMPROB_NEXTHEADER:
512 			code = PRC_UNREACH_PROTOCOL;
513 			break;
514 		case ICMP6_PARAMPROB_HEADER:
515 		case ICMP6_PARAMPROB_OPTION:
516 			code = PRC_PARAMPROB;
517 			break;
518 		default:
519 			goto badcode;
520 		}
521 		goto deliver;
522 
523 	case ICMP6_ECHO_REQUEST:
524 		if (code != 0)
525 			goto badcode;
526 		/*
527 		 * Copy mbuf to send to two data paths: userland socket(s),
528 		 * and to the querier (echo reply).
529 		 * m: a copy for socket, n: a copy for querier
530 		 */
531 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
532 			/* Give up local */
533 			n = m;
534 			m = NULL;
535 			goto deliverecho;
536 		}
537 		/*
538 		 * If the first mbuf is shared, or the first mbuf is too short,
539 		 * copy the first part of the data into a fresh mbuf.
540 		 * Otherwise, we will wrongly overwrite both copies.
541 		 */
542 		if ((n->m_flags & M_EXT) != 0 ||
543 		    n->m_len < off + sizeof(struct icmp6_hdr)) {
544 			struct mbuf *n0 = n;
545 			const int maxlen = sizeof(*nip6) + sizeof(*nicmp6);
546 
547 			/*
548 			 * Prepare an internal mbuf.  m_pullup() doesn't
549 			 * always copy the length we specified.
550 			 */
551 			if (maxlen >= MCLBYTES) {
552 				/* Give up remote */
553 				m_freem(n0);
554 				break;
555 			}
556 			MGETHDR(n, M_DONTWAIT, n0->m_type);
557 			if (n && maxlen >= MHLEN) {
558 				MCLGET(n, M_DONTWAIT);
559 				if ((n->m_flags & M_EXT) == 0) {
560 					m_free(n);
561 					n = NULL;
562 				}
563 			}
564 			if (n == NULL) {
565 				/* Give up local */
566 				m_freem(n0);
567 				n = m;
568 				m = NULL;
569 				goto deliverecho;
570 			}
571 			M_MOVE_PKTHDR(n, n0);
572 			/*
573 			 * Copy IPv6 and ICMPv6 only.
574 			 */
575 			nip6 = mtod(n, struct ip6_hdr *);
576 			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
577 			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
578 			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
579 			noff = sizeof(struct ip6_hdr);
580 			n->m_len = noff + sizeof(struct icmp6_hdr);
581 			/*
582 			 * Adjust mbuf.  ip6_plen will be adjusted in
583 			 * ip6_output().
584 			 * n->m_pkthdr.len == n0->m_pkthdr.len at this point.
585 			 */
586 			n->m_pkthdr.len += noff + sizeof(struct icmp6_hdr);
587 			n->m_pkthdr.len -= (off + sizeof(struct icmp6_hdr));
588 			m_adj(n0, off + sizeof(struct icmp6_hdr));
589 			n->m_next = n0;
590 		} else {
591 	 deliverecho:
592 			IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off,
593 			    sizeof(*nicmp6));
594 			noff = off;
595 		}
596 		nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
597 		nicmp6->icmp6_code = 0;
598 		if (n) {
599 			icmp6stat.icp6s_reflect++;
600 			icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]++;
601 			icmp6_reflect(n, noff);
602 		}
603 		if (!m)
604 			goto freeit;
605 		break;
606 
607 	case ICMP6_ECHO_REPLY:
608 		if (code != 0)
609 			goto badcode;
610 		break;
611 
612 	case MLD_LISTENER_QUERY:
613 	case MLD_LISTENER_REPORT:
614 		if (icmp6len < sizeof(struct mld_hdr))
615 			goto badlen;
616 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
617 			/* give up local */
618 			mld6_input(m, off);
619 			m = NULL;
620 			goto freeit;
621 		}
622 		mld6_input(n, off);
623 		/* m stays. */
624 		break;
625 
626 	case MLD_LISTENER_DONE:
627 		if (icmp6len < sizeof(struct mld_hdr))	/* necessary? */
628 			goto badlen;
629 		break;		/* nothing to be done in kernel */
630 
631 	case MLD_MTRACE_RESP:
632 	case MLD_MTRACE:
633 		/* XXX: these two are experimental.  not officially defined. */
634 		/* XXX: per-interface statistics? */
635 		break;		/* just pass it to applications */
636 
637 	case ICMP6_WRUREQUEST:	/* ICMP6_FQDN_QUERY */
638 		/* IPv6 Node Information Queries are not supported */
639 		break;
640 	case ICMP6_WRUREPLY:
641 		break;
642 
643 	case ND_ROUTER_SOLICIT:
644 		if (code != 0)
645 			goto badcode;
646 		if (icmp6len < sizeof(struct nd_router_solicit))
647 			goto badlen;
648 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
649 			/* give up local */
650 			nd6_rs_input(m, off, icmp6len);
651 			m = NULL;
652 			goto freeit;
653 		}
654 		nd6_rs_input(n, off, icmp6len);
655 		/* m stays. */
656 		break;
657 
658 	case ND_ROUTER_ADVERT:
659 		if (code != 0)
660 			goto badcode;
661 		if (icmp6len < sizeof(struct nd_router_advert))
662 			goto badlen;
663 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
664 			/* give up local */
665 			nd6_ra_input(m, off, icmp6len);
666 			m = NULL;
667 			goto freeit;
668 		}
669 		nd6_ra_input(n, off, icmp6len);
670 		/* m stays. */
671 		break;
672 
673 	case ND_NEIGHBOR_SOLICIT:
674 		if (code != 0)
675 			goto badcode;
676 		if (icmp6len < sizeof(struct nd_neighbor_solicit))
677 			goto badlen;
678 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
679 			/* give up local */
680 			nd6_ns_input(m, off, icmp6len);
681 			m = NULL;
682 			goto freeit;
683 		}
684 		nd6_ns_input(n, off, icmp6len);
685 		/* m stays. */
686 		break;
687 
688 	case ND_NEIGHBOR_ADVERT:
689 		if (code != 0)
690 			goto badcode;
691 		if (icmp6len < sizeof(struct nd_neighbor_advert))
692 			goto badlen;
693 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
694 			/* give up local */
695 			nd6_na_input(m, off, icmp6len);
696 			m = NULL;
697 			goto freeit;
698 		}
699 		nd6_na_input(n, off, icmp6len);
700 		/* m stays. */
701 		break;
702 
703 	case ND_REDIRECT:
704 		if (code != 0)
705 			goto badcode;
706 		if (icmp6len < sizeof(struct nd_redirect))
707 			goto badlen;
708 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
709 			/* give up local */
710 			icmp6_redirect_input(m, off);
711 			m = NULL;
712 			goto freeit;
713 		}
714 		icmp6_redirect_input(n, off);
715 		/* m stays. */
716 		break;
717 
718 	case ICMP6_ROUTER_RENUMBERING:
719 		if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
720 		    code != ICMP6_ROUTER_RENUMBERING_RESULT)
721 			goto badcode;
722 		if (icmp6len < sizeof(struct icmp6_router_renum))
723 			goto badlen;
724 		break;
725 
726 	default:
727 		nd6log((LOG_DEBUG,
728 		    "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%u)\n",
729 		    icmp6->icmp6_type,
730 		    inet_ntop(AF_INET6, &ip6->ip6_src, src, sizeof(src)),
731 		    inet_ntop(AF_INET6, &ip6->ip6_dst, dst, sizeof(dst)),
732 		    m->m_pkthdr.ph_ifidx));
733 		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
734 			/* ICMPv6 error: MUST deliver it by spec... */
735 			code = PRC_NCMDS;
736 			/* deliver */
737 		} else {
738 			/* ICMPv6 informational: MUST not deliver */
739 			break;
740 		}
741 deliver:
742 		if (icmp6_notify_error(m, off, icmp6len, code)) {
743 			/* In this case, m should've been freed. */
744 			return (IPPROTO_DONE);
745 		}
746 		break;
747 
748 badcode:
749 		icmp6stat.icp6s_badcode++;
750 		break;
751 
752 badlen:
753 		icmp6stat.icp6s_badlen++;
754 		break;
755 	}
756 
757 #if NPF > 0
758 raw:
759 #endif
760 	/* deliver the packet to appropriate sockets */
761 	icmp6_rip6_input(&m, *offp);
762 
763 	return IPPROTO_DONE;
764 
765  freeit:
766 	m_freem(m);
767 	return IPPROTO_DONE;
768 }
769 
770 int
771 icmp6_notify_error(struct mbuf *m, int off, int icmp6len, int code)
772 {
773 	struct icmp6_hdr *icmp6;
774 	struct ip6_hdr *eip6;
775 	u_int32_t notifymtu;
776 	struct sockaddr_in6 icmp6src, icmp6dst;
777 
778 	if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
779 		icmp6stat.icp6s_tooshort++;
780 		goto freeit;
781 	}
782 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
783 		       sizeof(*icmp6) + sizeof(struct ip6_hdr));
784 	if (icmp6 == NULL) {
785 		icmp6stat.icp6s_tooshort++;
786 		return (-1);
787 	}
788 	eip6 = (struct ip6_hdr *)(icmp6 + 1);
789 
790 	/* Detect the upper level protocol */
791 	{
792 		void (*ctlfunc)(int, struct sockaddr *, u_int, void *);
793 		u_int8_t nxt = eip6->ip6_nxt;
794 		int eoff = off + sizeof(struct icmp6_hdr) +
795 			sizeof(struct ip6_hdr);
796 		struct ip6ctlparam ip6cp;
797 		struct in6_addr *finaldst = NULL;
798 		int icmp6type = icmp6->icmp6_type;
799 		struct ip6_frag *fh;
800 		struct ip6_rthdr *rth;
801 		struct ip6_rthdr0 *rth0;
802 		int rthlen;
803 
804 		while (1) { /* XXX: should avoid infinite loop explicitly? */
805 			struct ip6_ext *eh;
806 
807 			switch (nxt) {
808 			case IPPROTO_HOPOPTS:
809 			case IPPROTO_DSTOPTS:
810 			case IPPROTO_AH:
811 				IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
812 					       eoff, sizeof(*eh));
813 				if (eh == NULL) {
814 					icmp6stat.icp6s_tooshort++;
815 					return (-1);
816 				}
817 
818 				if (nxt == IPPROTO_AH)
819 					eoff += (eh->ip6e_len + 2) << 2;
820 				else
821 					eoff += (eh->ip6e_len + 1) << 3;
822 				nxt = eh->ip6e_nxt;
823 				break;
824 			case IPPROTO_ROUTING:
825 				/*
826 				 * When the erroneous packet contains a
827 				 * routing header, we should examine the
828 				 * header to determine the final destination.
829 				 * Otherwise, we can't properly update
830 				 * information that depends on the final
831 				 * destination (e.g. path MTU).
832 				 */
833 				IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
834 					       eoff, sizeof(*rth));
835 				if (rth == NULL) {
836 					icmp6stat.icp6s_tooshort++;
837 					return (-1);
838 				}
839 				rthlen = (rth->ip6r_len + 1) << 3;
840 				/*
841 				 * XXX: currently there is no
842 				 * officially defined type other
843 				 * than type-0.
844 				 * Note that if the segment left field
845 				 * is 0, all intermediate hops must
846 				 * have been passed.
847 				 */
848 				if (rth->ip6r_segleft &&
849 				    rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
850 					int hops;
851 
852 					IP6_EXTHDR_GET(rth0,
853 						       struct ip6_rthdr0 *, m,
854 						       eoff, rthlen);
855 					if (rth0 == NULL) {
856 						icmp6stat.icp6s_tooshort++;
857 						return (-1);
858 					}
859 					/* just ignore a bogus header */
860 					if ((rth0->ip6r0_len % 2) == 0 &&
861 					    (hops = rth0->ip6r0_len/2))
862 						finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
863 				}
864 				eoff += rthlen;
865 				nxt = rth->ip6r_nxt;
866 				break;
867 			case IPPROTO_FRAGMENT:
868 				IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
869 					       eoff, sizeof(*fh));
870 				if (fh == NULL) {
871 					icmp6stat.icp6s_tooshort++;
872 					return (-1);
873 				}
874 				/*
875 				 * Data after a fragment header is meaningless
876 				 * unless it is the first fragment, but
877 				 * we'll go to the notify label for path MTU
878 				 * discovery.
879 				 */
880 				if (fh->ip6f_offlg & IP6F_OFF_MASK)
881 					goto notify;
882 
883 				eoff += sizeof(struct ip6_frag);
884 				nxt = fh->ip6f_nxt;
885 				break;
886 			default:
887 				/*
888 				 * This case includes ESP and the No Next
889 				 * Header.  In such cases going to the notify
890 				 * label does not have any meaning
891 				 * (i.e. ctlfunc will be NULL), but we go
892 				 * anyway since we might have to update
893 				 * path MTU information.
894 				 */
895 				goto notify;
896 			}
897 		}
898 	  notify:
899 		IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
900 			       sizeof(*icmp6) + sizeof(struct ip6_hdr));
901 		if (icmp6 == NULL) {
902 			icmp6stat.icp6s_tooshort++;
903 			return (-1);
904 		}
905 
906 		eip6 = (struct ip6_hdr *)(icmp6 + 1);
907 		bzero(&icmp6dst, sizeof(icmp6dst));
908 		icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
909 		icmp6dst.sin6_family = AF_INET6;
910 		if (finaldst == NULL)
911 			icmp6dst.sin6_addr = eip6->ip6_dst;
912 		else
913 			icmp6dst.sin6_addr = *finaldst;
914 		icmp6dst.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.ph_ifidx,
915 		    &icmp6dst.sin6_addr);
916 		if (in6_embedscope(&icmp6dst.sin6_addr, &icmp6dst, NULL)) {
917 			/* should be impossbile */
918 			nd6log((LOG_DEBUG,
919 			    "icmp6_notify_error: in6_embedscope failed\n"));
920 			goto freeit;
921 		}
922 
923 		/*
924 		 * retrieve parameters from the inner IPv6 header, and convert
925 		 * them into sockaddr structures.
926 		 */
927 		bzero(&icmp6src, sizeof(icmp6src));
928 		icmp6src.sin6_len = sizeof(struct sockaddr_in6);
929 		icmp6src.sin6_family = AF_INET6;
930 		icmp6src.sin6_addr = eip6->ip6_src;
931 		icmp6src.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.ph_ifidx,
932 		    &icmp6src.sin6_addr);
933 		if (in6_embedscope(&icmp6src.sin6_addr, &icmp6src, NULL)) {
934 			/* should be impossbile */
935 			nd6log((LOG_DEBUG,
936 			    "icmp6_notify_error: in6_embedscope failed\n"));
937 			goto freeit;
938 		}
939 		icmp6src.sin6_flowinfo =
940 		    (eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
941 
942 		if (finaldst == NULL)
943 			finaldst = &eip6->ip6_dst;
944 		ip6cp.ip6c_m = m;
945 		ip6cp.ip6c_icmp6 = icmp6;
946 		ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
947 		ip6cp.ip6c_off = eoff;
948 		ip6cp.ip6c_finaldst = finaldst;
949 		ip6cp.ip6c_src = &icmp6src;
950 		ip6cp.ip6c_nxt = nxt;
951 #if NPF > 0
952 		pf_pkt_addr_changed(m);
953 #endif
954 
955 		if (icmp6type == ICMP6_PACKET_TOO_BIG) {
956 			notifymtu = ntohl(icmp6->icmp6_mtu);
957 			ip6cp.ip6c_cmdarg = (void *)&notifymtu;
958 		}
959 
960 		ctlfunc = inet6sw[ip6_protox[nxt]].pr_ctlinput;
961 		if (ctlfunc)
962 			(*ctlfunc)(code, sin6tosa(&icmp6dst),
963 			    m->m_pkthdr.ph_rtableid, &ip6cp);
964 	}
965 	return (0);
966 
967   freeit:
968 	m_freem(m);
969 	return (-1);
970 }
971 
972 void
973 icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated)
974 {
975 	unsigned long rtcount;
976 	struct icmp6_mtudisc_callback *mc;
977 	struct in6_addr *dst = ip6cp->ip6c_finaldst;
978 	struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
979 	struct mbuf *m = ip6cp->ip6c_m;	/* will be necessary for scope issue */
980 	u_int mtu = ntohl(icmp6->icmp6_mtu);
981 	struct rtentry *rt = NULL;
982 	struct sockaddr_in6 sin6;
983 
984 	/*
985 	 * The MTU may not be less then the minimal IPv6 MTU except for the
986 	 * hack in ip6_output/ip6_setpmtu where we always include a frag header.
987 	 */
988 	if (mtu < IPV6_MMTU - sizeof(struct ip6_frag))
989 		return;
990 
991 	/*
992 	 * allow non-validated cases if memory is plenty, to make traffic
993 	 * from non-connected pcb happy.
994 	 */
995 	rtcount = rt_timer_queue_count(icmp6_mtudisc_timeout_q);
996 	if (validated) {
997 		if (0 <= icmp6_mtudisc_hiwat && rtcount > icmp6_mtudisc_hiwat)
998 			return;
999 		else if (0 <= icmp6_mtudisc_lowat &&
1000 		    rtcount > icmp6_mtudisc_lowat) {
1001 			/*
1002 			 * XXX nuke a victim, install the new one.
1003 			 */
1004 		}
1005 	} else {
1006 		if (0 <= icmp6_mtudisc_lowat && rtcount > icmp6_mtudisc_lowat)
1007 			return;
1008 	}
1009 
1010 	bzero(&sin6, sizeof(sin6));
1011 	sin6.sin6_family = PF_INET6;
1012 	sin6.sin6_len = sizeof(struct sockaddr_in6);
1013 	sin6.sin6_addr = *dst;
1014 	/* XXX normally, this won't happen */
1015 	if (IN6_IS_ADDR_LINKLOCAL(dst)) {
1016 		sin6.sin6_addr.s6_addr16[1] = htons(m->m_pkthdr.ph_ifidx);
1017 	}
1018 	sin6.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.ph_ifidx,
1019 	    &sin6.sin6_addr);
1020 
1021 	rt = icmp6_mtudisc_clone(sin6tosa(&sin6), m->m_pkthdr.ph_rtableid);
1022 
1023 	if (rt != NULL && ISSET(rt->rt_flags, RTF_HOST) &&
1024 	    !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
1025 	    (rt->rt_rmx.rmx_mtu > mtu || rt->rt_rmx.rmx_mtu == 0)) {
1026 	    	struct ifnet *ifp;
1027 
1028 	    	ifp = if_get(rt->rt_ifidx);
1029 		if (ifp != NULL && mtu < ifp->if_mtu) {
1030 			icmp6stat.icp6s_pmtuchg++;
1031 			rt->rt_rmx.rmx_mtu = mtu;
1032 		}
1033 		if_put(ifp);
1034 	}
1035 	rtfree(rt);
1036 
1037 	/*
1038 	 * Notify protocols that the MTU for this destination
1039 	 * has changed.
1040 	 */
1041 	for (mc = LIST_FIRST(&icmp6_mtudisc_callbacks); mc != NULL;
1042 	     mc = LIST_NEXT(mc, mc_list))
1043 		(*mc->mc_func)(&sin6, m->m_pkthdr.ph_rtableid);
1044 }
1045 
1046 /*
1047  * XXX almost dup'ed code with rip6_input.
1048  */
1049 int
1050 icmp6_rip6_input(struct mbuf **mp, int off)
1051 {
1052 	struct mbuf *m = *mp;
1053 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1054 	struct inpcb *in6p;
1055 	struct inpcb *last = NULL;
1056 	struct sockaddr_in6 rip6src;
1057 	struct icmp6_hdr *icmp6;
1058 	struct mbuf *opts = NULL;
1059 
1060 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
1061 	if (icmp6 == NULL) {
1062 		/* m is already reclaimed */
1063 		return IPPROTO_DONE;
1064 	}
1065 
1066 	bzero(&rip6src, sizeof(rip6src));
1067 	rip6src.sin6_len = sizeof(struct sockaddr_in6);
1068 	rip6src.sin6_family = AF_INET6;
1069 	/* KAME hack: recover scopeid */
1070 	in6_recoverscope(&rip6src, &ip6->ip6_src);
1071 
1072 	TAILQ_FOREACH(in6p, &rawin6pcbtable.inpt_queue, inp_queue) {
1073 		if (!(in6p->inp_flags & INP_IPV6))
1074 			continue;
1075 		if (in6p->inp_ipv6.ip6_nxt != IPPROTO_ICMPV6)
1076 			continue;
1077 #if NPF > 0
1078 		if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
1079 			struct pf_divert *divert;
1080 
1081 			/* XXX rdomain support */
1082 			if ((divert = pf_find_divert(m)) == NULL)
1083 				continue;
1084 			if (IN6_IS_ADDR_UNSPECIFIED(&divert->addr.v6))
1085 				goto divert_reply;
1086 			if (!IN6_ARE_ADDR_EQUAL(&in6p->inp_laddr6,
1087 			    &divert->addr.v6))
1088 				continue;
1089 		} else
1090  divert_reply:
1091 #endif
1092 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->inp_laddr6) &&
1093 		   !IN6_ARE_ADDR_EQUAL(&in6p->inp_laddr6, &ip6->ip6_dst))
1094 			continue;
1095 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->inp_faddr6) &&
1096 		   !IN6_ARE_ADDR_EQUAL(&in6p->inp_faddr6, &ip6->ip6_src))
1097 			continue;
1098 		if (in6p->inp_icmp6filt
1099 		    && ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
1100 				 in6p->inp_icmp6filt))
1101 			continue;
1102 		if (last) {
1103 			struct	mbuf *n;
1104 			if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
1105 				if (last->inp_flags & IN6P_CONTROLOPTS)
1106 					ip6_savecontrol(last, n, &opts);
1107 				/* strip intermediate headers */
1108 				m_adj(n, off);
1109 				if (sbappendaddr(&last->inp_socket->so_rcv,
1110 				    sin6tosa(&rip6src), n, opts) == 0) {
1111 					/* should notify about lost packet */
1112 					m_freem(n);
1113 					m_freem(opts);
1114 				} else
1115 					sorwakeup(last->inp_socket);
1116 				opts = NULL;
1117 			}
1118 		}
1119 		last = in6p;
1120 	}
1121 	if (last) {
1122 		if (last->inp_flags & IN6P_CONTROLOPTS)
1123 			ip6_savecontrol(last, m, &opts);
1124 		/* strip intermediate headers */
1125 		m_adj(m, off);
1126 		if (sbappendaddr(&last->inp_socket->so_rcv,
1127 		    sin6tosa(&rip6src), m, opts) == 0) {
1128 			m_freem(m);
1129 			m_freem(opts);
1130 		} else
1131 			sorwakeup(last->inp_socket);
1132 	} else {
1133 		m_freem(m);
1134 		ip6stat.ip6s_delivered--;
1135 	}
1136 	return IPPROTO_DONE;
1137 }
1138 
1139 /*
1140  * Reflect the ip6 packet back to the source.
1141  * OFF points to the icmp6 header, counted from the top of the mbuf.
1142  *
1143  * Note: RFC 1885 required that an echo reply should be truncated if it
1144  * did not fit in with (return) path MTU, and KAME code supported the
1145  * behavior.  However, as a clarification after the RFC, this limitation
1146  * was removed in a revised version of the spec, RFC 2463.  We had kept the
1147  * old behavior, with a (non-default) ifdef block, while the new version of
1148  * the spec was an internet-draft status, and even after the new RFC was
1149  * published.  But it would rather make sense to clean the obsoleted part
1150  * up, and to make the code simpler at this stage.
1151  */
1152 void
1153 icmp6_reflect(struct mbuf *m, size_t off)
1154 {
1155 	struct rtentry *rt = NULL;
1156 	struct ip6_hdr *ip6;
1157 	struct icmp6_hdr *icmp6;
1158 	struct in6_ifaddr *ia6;
1159 	struct in6_addr t, *src = NULL;
1160 	struct sockaddr_in6 sa6_src, sa6_dst;
1161 
1162 	/* too short to reflect */
1163 	if (off < sizeof(struct ip6_hdr)) {
1164 		nd6log((LOG_DEBUG,
1165 		    "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
1166 		    (u_long)off, (u_long)sizeof(struct ip6_hdr),
1167 		    __FILE__, __LINE__));
1168 		goto bad;
1169 	}
1170 
1171 	/*
1172 	 * If there are extra headers between IPv6 and ICMPv6, strip
1173 	 * off that header first.
1174 	 */
1175 #ifdef DIAGNOSTIC
1176 	if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN)
1177 		panic("assumption failed in icmp6_reflect");
1178 #endif
1179 	if (off > sizeof(struct ip6_hdr)) {
1180 		size_t l;
1181 		struct ip6_hdr nip6;
1182 
1183 		l = off - sizeof(struct ip6_hdr);
1184 		m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6);
1185 		m_adj(m, l);
1186 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
1187 		if (m->m_len < l) {
1188 			if ((m = m_pullup(m, l)) == NULL)
1189 				return;
1190 		}
1191 		bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6));
1192 	} else /* off == sizeof(struct ip6_hdr) */ {
1193 		size_t l;
1194 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
1195 		if (m->m_len < l) {
1196 			if ((m = m_pullup(m, l)) == NULL)
1197 				return;
1198 		}
1199 	}
1200 	ip6 = mtod(m, struct ip6_hdr *);
1201 	ip6->ip6_nxt = IPPROTO_ICMPV6;
1202 	icmp6 = (struct icmp6_hdr *)(ip6 + 1);
1203 
1204 	t = ip6->ip6_dst;
1205 	/*
1206 	 * ip6_input() drops a packet if its src is multicast.
1207 	 * So, the src is never multicast.
1208 	 */
1209 	ip6->ip6_dst = ip6->ip6_src;
1210 
1211 	/*
1212 	 * XXX: make sure to embed scope zone information, using
1213 	 * already embedded IDs or the received interface (if any).
1214 	 * Note that rcvif may be NULL.
1215 	 * TODO: scoped routing case (XXX).
1216 	 */
1217 	bzero(&sa6_src, sizeof(sa6_src));
1218 	sa6_src.sin6_family = AF_INET6;
1219 	sa6_src.sin6_len = sizeof(sa6_src);
1220 	sa6_src.sin6_addr = ip6->ip6_dst;
1221 	in6_recoverscope(&sa6_src, &ip6->ip6_dst);
1222 	in6_embedscope(&ip6->ip6_dst, &sa6_src, NULL);
1223 	bzero(&sa6_dst, sizeof(sa6_dst));
1224 	sa6_dst.sin6_family = AF_INET6;
1225 	sa6_dst.sin6_len = sizeof(sa6_dst);
1226 	sa6_dst.sin6_addr = t;
1227 	in6_recoverscope(&sa6_dst, &t);
1228 	in6_embedscope(&t, &sa6_dst, NULL);
1229 
1230 	/*
1231 	 * If the incoming packet was addressed directly to us (i.e. unicast),
1232 	 * use dst as the src for the reply.
1233 	 * The IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED case would be VERY rare,
1234 	 * but is possible (for example) when we encounter an error while
1235 	 * forwarding procedure destined to a duplicated address of ours.
1236 	 */
1237 	TAILQ_FOREACH(ia6, &in6_ifaddr, ia_list)
1238 		if (IN6_ARE_ADDR_EQUAL(&t, &ia6->ia_addr.sin6_addr) &&
1239 		    (ia6->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_TENTATIVE|
1240 		    IN6_IFF_DUPLICATED)) == 0) {
1241 			src = &t;
1242 			break;
1243 		}
1244 	if (ia6 == NULL && IN6_IS_ADDR_LINKLOCAL(&t) && (m->m_flags & M_LOOP)) {
1245 		/*
1246 		 * This is the case if the dst is our link-local address
1247 		 * and the sender is also ourselves.
1248 		 */
1249 		src = &t;
1250 	}
1251 
1252 	if (src == NULL) {
1253 		/*
1254 		 * This case matches to multicasts, our anycast, or unicasts
1255 		 * that we do not own.  Select a source address based on the
1256 		 * source address of the erroneous packet.
1257 		 */
1258 		rt = rtalloc(sin6tosa(&sa6_src), RT_RESOLVE,
1259 		    m->m_pkthdr.ph_rtableid);
1260 		if (!rtisvalid(rt)) {
1261 			char addr[INET6_ADDRSTRLEN];
1262 
1263 			nd6log((LOG_DEBUG,
1264 			    "%s: source can't be determined: dst=%s\n",
1265 			    __func__, inet_ntop(AF_INET6, &sa6_src.sin6_addr,
1266 				addr, sizeof(addr))));
1267 			rtfree(rt);
1268 			goto bad;
1269 		}
1270 		src = &ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr;
1271 	}
1272 
1273 	ip6->ip6_src = *src;
1274 	rtfree(rt);
1275 
1276 	ip6->ip6_flow = 0;
1277 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
1278 	ip6->ip6_vfc |= IPV6_VERSION;
1279 	ip6->ip6_nxt = IPPROTO_ICMPV6;
1280 	ip6->ip6_hlim = ip6_defhlim;
1281 
1282 	icmp6->icmp6_cksum = 0;
1283 	m->m_pkthdr.csum_flags = M_ICMP_CSUM_OUT;
1284 
1285 	/*
1286 	 * XXX option handling
1287 	 */
1288 
1289 	m->m_flags &= ~(M_BCAST|M_MCAST);
1290 
1291 	/*
1292 	 * To avoid a "too big" situation at an intermediate router
1293 	 * and the path MTU discovery process, specify the IPV6_MINMTU flag.
1294 	 * Note that only echo and node information replies are affected,
1295 	 * since the length of ICMP6 errors is limited to the minimum MTU.
1296 	 */
1297 #if NPF > 0
1298 	pf_pkt_addr_changed(m);
1299 #endif
1300 	ip6_send(m);
1301 	return;
1302 
1303  bad:
1304 	m_freem(m);
1305 	return;
1306 }
1307 
1308 void
1309 icmp6_fasttimo(void)
1310 {
1311 
1312 	mld6_fasttimeo();
1313 }
1314 
1315 const char *
1316 icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6,
1317     struct in6_addr *tgt6)
1318 {
1319 	static char buf[1024]; /* XXX */
1320 	char src[INET6_ADDRSTRLEN];
1321 	char dst[INET6_ADDRSTRLEN];
1322 	char tgt[INET6_ADDRSTRLEN];
1323 
1324 	snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)",
1325 		 inet_ntop(AF_INET6, src6, src, sizeof(src)),
1326 		 inet_ntop(AF_INET6, dst6, dst, sizeof(dst)),
1327 		 inet_ntop(AF_INET6, tgt6, tgt, sizeof(tgt)));
1328 	return buf;
1329 }
1330 
1331 void
1332 icmp6_redirect_input(struct mbuf *m, int off)
1333 {
1334 	struct ifnet *ifp;
1335 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1336 	struct nd_redirect *nd_rd;
1337 	int icmp6len = ntohs(ip6->ip6_plen);
1338 	char *lladdr = NULL;
1339 	int lladdrlen = 0;
1340 	struct rtentry *rt = NULL;
1341 	int is_router;
1342 	int is_onlink;
1343 	struct in6_addr src6 = ip6->ip6_src;
1344 	struct in6_addr redtgt6;
1345 	struct in6_addr reddst6;
1346 	union nd_opts ndopts;
1347 	char addr[INET6_ADDRSTRLEN];
1348 
1349 	ifp = if_get(m->m_pkthdr.ph_ifidx);
1350 	if (ifp == NULL)
1351 		return;
1352 
1353 	/* XXX if we are router, we don't update route by icmp6 redirect */
1354 	if (ip6_forwarding)
1355 		goto freeit;
1356 	if (!(ifp->if_xflags & IFXF_AUTOCONF6))
1357 		goto freeit;
1358 
1359 	IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
1360 	if (nd_rd == NULL) {
1361 		icmp6stat.icp6s_tooshort++;
1362 		if_put(ifp);
1363 		return;
1364 	}
1365 	redtgt6 = nd_rd->nd_rd_target;
1366 	reddst6 = nd_rd->nd_rd_dst;
1367 
1368 	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
1369 		redtgt6.s6_addr16[1] = htons(ifp->if_index);
1370 	if (IN6_IS_ADDR_LINKLOCAL(&reddst6))
1371 		reddst6.s6_addr16[1] = htons(ifp->if_index);
1372 
1373 	/* validation */
1374 	if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
1375 		nd6log((LOG_ERR,
1376 			"ICMP6 redirect sent from %s rejected; "
1377 			"must be from linklocal\n",
1378 			inet_ntop(AF_INET6, &src6, addr, sizeof(addr))));
1379 		goto bad;
1380 	}
1381 	if (ip6->ip6_hlim != 255) {
1382 		nd6log((LOG_ERR,
1383 			"ICMP6 redirect sent from %s rejected; "
1384 			"hlim=%d (must be 255)\n",
1385 			inet_ntop(AF_INET6, &src6, addr, sizeof(addr)),
1386 			ip6->ip6_hlim));
1387 		goto bad;
1388 	}
1389 	if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
1390 		nd6log((LOG_ERR,
1391 			"ICMP6 redirect rejected; "
1392 			"redirect dst must be unicast: %s\n",
1393 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1394 		goto bad;
1395 	}
1396     {
1397 	/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
1398 	struct sockaddr_in6 sin6;
1399 	struct in6_addr *gw6;
1400 
1401 	bzero(&sin6, sizeof(sin6));
1402 	sin6.sin6_family = AF_INET6;
1403 	sin6.sin6_len = sizeof(struct sockaddr_in6);
1404 	bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6));
1405 	rt = rtalloc(sin6tosa(&sin6), 0, m->m_pkthdr.ph_rtableid);
1406 	if (rt) {
1407 		if (rt->rt_gateway == NULL ||
1408 		    rt->rt_gateway->sa_family != AF_INET6) {
1409 			nd6log((LOG_ERR,
1410 			    "ICMP6 redirect rejected; no route "
1411 			    "with inet6 gateway found for redirect dst: %s\n",
1412 			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1413 			rtfree(rt);
1414 			goto bad;
1415 		}
1416 
1417 		gw6 = &(satosin6(rt->rt_gateway)->sin6_addr);
1418 		if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
1419 			nd6log((LOG_ERR,
1420 				"ICMP6 redirect rejected; "
1421 				"not equal to gw-for-src=%s (must be same): "
1422 				"%s\n",
1423 				inet_ntop(AF_INET6, gw6, addr, sizeof(addr)),
1424 				icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1425 			rtfree(rt);
1426 			goto bad;
1427 		}
1428 	} else {
1429 		nd6log((LOG_ERR,
1430 			"ICMP6 redirect rejected; "
1431 			"no route found for redirect dst: %s\n",
1432 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1433 		goto bad;
1434 	}
1435 	rtfree(rt);
1436 	rt = NULL;
1437     }
1438 
1439 	is_router = is_onlink = 0;
1440 	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
1441 		is_router = 1;	/* router case */
1442 	if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
1443 		is_onlink = 1;	/* on-link destination case */
1444 	if (!is_router && !is_onlink) {
1445 		nd6log((LOG_ERR,
1446 			"ICMP6 redirect rejected; "
1447 			"neither router case nor onlink case: %s\n",
1448 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1449 		goto bad;
1450 	}
1451 	/* validation passed */
1452 
1453 	icmp6len -= sizeof(*nd_rd);
1454 	nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
1455 	if (nd6_options(&ndopts) < 0) {
1456 		nd6log((LOG_INFO, "icmp6_redirect_input: "
1457 			"invalid ND option, rejected: %s\n",
1458 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1459 		/* nd6_options have incremented stats */
1460 		goto freeit;
1461 	}
1462 
1463 	if (ndopts.nd_opts_tgt_lladdr) {
1464 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
1465 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
1466 	}
1467 
1468 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
1469 		nd6log((LOG_INFO,
1470 			"icmp6_redirect_input: lladdrlen mismatch for %s "
1471 			"(if %d, icmp6 packet %d): %s\n",
1472 			inet_ntop(AF_INET6, &redtgt6, addr, sizeof(addr)),
1473 			ifp->if_addrlen, lladdrlen - 2,
1474 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
1475 		goto bad;
1476 	}
1477 
1478 	/* RFC 2461 8.3 */
1479 	nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
1480 			 is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
1481 
1482 	if (!is_onlink) {	/* better router case.  perform rtredirect. */
1483 		/* perform rtredirect */
1484 		struct sockaddr_in6 sdst;
1485 		struct sockaddr_in6 sgw;
1486 		struct sockaddr_in6 ssrc;
1487 		unsigned long rtcount;
1488 		struct rtentry *newrt = NULL;
1489 
1490 		/*
1491 		 * do not install redirect route, if the number of entries
1492 		 * is too much (> hiwat).  note that, the node (= host) will
1493 		 * work just fine even if we do not install redirect route
1494 		 * (there will be additional hops, though).
1495 		 */
1496 		rtcount = rt_timer_queue_count(icmp6_redirect_timeout_q);
1497 		if (0 <= ip6_maxdynroutes && rtcount >= ip6_maxdynroutes)
1498 			goto freeit;
1499 		else if (0 <= icmp6_redirect_lowat &&
1500 		    rtcount > icmp6_redirect_lowat) {
1501 			/*
1502 			 * XXX nuke a victim, install the new one.
1503 			 */
1504 		}
1505 
1506 		bzero(&sdst, sizeof(sdst));
1507 		bzero(&sgw, sizeof(sgw));
1508 		bzero(&ssrc, sizeof(ssrc));
1509 		sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6;
1510 		sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len =
1511 			sizeof(struct sockaddr_in6);
1512 		bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr));
1513 		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
1514 		bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
1515 		rtredirect(sin6tosa(&sdst), sin6tosa(&sgw), sin6tosa(&ssrc),
1516 		    &newrt, m->m_pkthdr.ph_rtableid);
1517 
1518 		if (newrt) {
1519 			(void)rt_timer_add(newrt, icmp6_redirect_timeout,
1520 			    icmp6_redirect_timeout_q, m->m_pkthdr.ph_rtableid);
1521 			rtfree(newrt);
1522 		}
1523 	}
1524 	/* finally update cached route in each socket via pfctlinput */
1525 	{
1526 		struct sockaddr_in6 sdst;
1527 
1528 		bzero(&sdst, sizeof(sdst));
1529 		sdst.sin6_family = AF_INET6;
1530 		sdst.sin6_len = sizeof(struct sockaddr_in6);
1531 		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
1532 		pfctlinput(PRC_REDIRECT_HOST, sin6tosa(&sdst));
1533 	}
1534 
1535  freeit:
1536 	if_put(ifp);
1537 	m_freem(m);
1538 	return;
1539 
1540  bad:
1541 	if_put(ifp);
1542 	icmp6stat.icp6s_badredirect++;
1543 	m_freem(m);
1544 }
1545 
1546 void
1547 icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
1548 {
1549 	struct ifnet *ifp = NULL;
1550 	struct in6_addr *ifp_ll6;
1551 	struct in6_addr *nexthop;
1552 	struct ip6_hdr *sip6;	/* m0 as struct ip6_hdr */
1553 	struct mbuf *m = NULL;	/* newly allocated one */
1554 	struct ip6_hdr *ip6;	/* m as struct ip6_hdr */
1555 	struct nd_redirect *nd_rd;
1556 	size_t maxlen;
1557 	u_char *p;
1558 	struct sockaddr_in6 src_sa;
1559 
1560 	icmp6_errcount(&icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0);
1561 
1562 	/* if we are not router, we don't send icmp6 redirect */
1563 	if (!ip6_forwarding)
1564 		goto fail;
1565 
1566 	/* sanity check */
1567 	if (m0 == NULL || !rtisvalid(rt))
1568 		goto fail;
1569 
1570 	ifp = if_get(rt->rt_ifidx);
1571 	if (ifp == NULL)
1572 		goto fail;
1573 
1574 	/*
1575 	 * Address check:
1576 	 *  the source address must identify a neighbor, and
1577 	 *  the destination address must not be a multicast address
1578 	 *  [RFC 2461, sec 8.2]
1579 	 */
1580 	sip6 = mtod(m0, struct ip6_hdr *);
1581 	bzero(&src_sa, sizeof(src_sa));
1582 	src_sa.sin6_family = AF_INET6;
1583 	src_sa.sin6_len = sizeof(src_sa);
1584 	src_sa.sin6_addr = sip6->ip6_src;
1585 	/* we don't currently use sin6_scope_id, but eventually use it */
1586 	src_sa.sin6_scope_id = in6_addr2scopeid(ifp->if_index, &sip6->ip6_src);
1587 	if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
1588 		goto fail;
1589 	if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
1590 		goto fail;	/* what should we do here? */
1591 
1592 	/* rate limit */
1593 	if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
1594 		goto fail;
1595 
1596 	/*
1597 	 * Since we are going to append up to 1280 bytes (= IPV6_MMTU),
1598 	 * we almost always ask for an mbuf cluster for simplicity.
1599 	 * (MHLEN < IPV6_MMTU is almost always true)
1600 	 */
1601 #if IPV6_MMTU >= MCLBYTES
1602 # error assumption failed about IPV6_MMTU and MCLBYTES
1603 #endif
1604 	MGETHDR(m, M_DONTWAIT, MT_HEADER);
1605 	if (m && IPV6_MMTU >= MHLEN)
1606 		MCLGET(m, M_DONTWAIT);
1607 	if (!m)
1608 		goto fail;
1609 	m->m_pkthdr.ph_ifidx = 0;
1610 	m->m_len = 0;
1611 	maxlen = M_TRAILINGSPACE(m);
1612 	maxlen = min(IPV6_MMTU, maxlen);
1613 	/* just for safety */
1614 	if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) +
1615 	    ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
1616 		goto fail;
1617 	}
1618 
1619 	{
1620 		/* get ip6 linklocal address for ifp(my outgoing interface). */
1621 		struct in6_ifaddr *ia6;
1622 		if ((ia6 = in6ifa_ifpforlinklocal(ifp, IN6_IFF_TENTATIVE|
1623 		    IN6_IFF_DUPLICATED|IN6_IFF_ANYCAST)) == NULL)
1624 			goto fail;
1625 		ifp_ll6 = &ia6->ia_addr.sin6_addr;
1626 	}
1627 
1628 	/* get ip6 linklocal address for the router. */
1629 	if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
1630 		struct sockaddr_in6 *sin6;
1631 		sin6 = satosin6(rt->rt_gateway);
1632 		nexthop = &sin6->sin6_addr;
1633 		if (!IN6_IS_ADDR_LINKLOCAL(nexthop))
1634 			nexthop = NULL;
1635 	} else
1636 		nexthop = NULL;
1637 
1638 	/* ip6 */
1639 	ip6 = mtod(m, struct ip6_hdr *);
1640 	ip6->ip6_flow = 0;
1641 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
1642 	ip6->ip6_vfc |= IPV6_VERSION;
1643 	/* ip6->ip6_plen will be set later */
1644 	ip6->ip6_nxt = IPPROTO_ICMPV6;
1645 	ip6->ip6_hlim = 255;
1646 	/* ip6->ip6_src must be linklocal addr for my outgoing if. */
1647 	bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
1648 	bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
1649 
1650 	/* ND Redirect */
1651 	nd_rd = (struct nd_redirect *)(ip6 + 1);
1652 	nd_rd->nd_rd_type = ND_REDIRECT;
1653 	nd_rd->nd_rd_code = 0;
1654 	nd_rd->nd_rd_reserved = 0;
1655 	if (rt->rt_flags & RTF_GATEWAY) {
1656 		/*
1657 		 * nd_rd->nd_rd_target must be a link-local address in
1658 		 * better router cases.
1659 		 */
1660 		if (!nexthop)
1661 			goto fail;
1662 		bcopy(nexthop, &nd_rd->nd_rd_target,
1663 		      sizeof(nd_rd->nd_rd_target));
1664 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
1665 		      sizeof(nd_rd->nd_rd_dst));
1666 	} else {
1667 		/* make sure redtgt == reddst */
1668 		nexthop = &sip6->ip6_dst;
1669 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
1670 		      sizeof(nd_rd->nd_rd_target));
1671 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
1672 		      sizeof(nd_rd->nd_rd_dst));
1673 	}
1674 
1675 	p = (u_char *)(nd_rd + 1);
1676 
1677 	{
1678 		/* target lladdr option */
1679 		struct rtentry *nrt;
1680 		int len;
1681 		struct sockaddr_dl *sdl;
1682 		struct nd_opt_hdr *nd_opt;
1683 		char *lladdr;
1684 
1685 		len = sizeof(*nd_opt) + ifp->if_addrlen;
1686 		len = (len + 7) & ~7;	/* round by 8 */
1687 		/* safety check */
1688 		if (len + (p - (u_char *)ip6) > maxlen)
1689 			goto nolladdropt;
1690 		nrt = nd6_lookup(nexthop, 0, ifp, ifp->if_rdomain);
1691 		if ((nrt != NULL) &&
1692 		    (nrt->rt_flags & (RTF_GATEWAY|RTF_LLINFO)) == RTF_LLINFO &&
1693 		    (nrt->rt_gateway->sa_family == AF_LINK) &&
1694 		    (sdl = satosdl(nrt->rt_gateway)) &&
1695 		    sdl->sdl_alen) {
1696 			nd_opt = (struct nd_opt_hdr *)p;
1697 			nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
1698 			nd_opt->nd_opt_len = len >> 3;
1699 			lladdr = (char *)(nd_opt + 1);
1700 			bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen);
1701 			p += len;
1702 		}
1703 		rtfree(nrt);
1704 	}
1705   nolladdropt:;
1706 
1707 	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
1708 
1709 	/* just to be safe */
1710 	if (p - (u_char *)ip6 > maxlen)
1711 		goto noredhdropt;
1712 
1713 	{
1714 		/* redirected header option */
1715 		int len;
1716 		struct nd_opt_rd_hdr *nd_opt_rh;
1717 
1718 		/*
1719 		 * compute the maximum size for icmp6 redirect header option.
1720 		 * XXX room for auth header?
1721 		 */
1722 		len = maxlen - (p - (u_char *)ip6);
1723 		len &= ~7;
1724 
1725 		/*
1726 		 * Redirected header option spec (RFC2461 4.6.3) talks nothing
1727 		 * about padding/truncate rule for the original IP packet.
1728 		 * From the discussion on IPv6imp in Feb 1999,
1729 		 * the consensus was:
1730 		 * - "attach as much as possible" is the goal
1731 		 * - pad if not aligned (original size can be guessed by
1732 		 *   original ip6 header)
1733 		 * Following code adds the padding if it is simple enough,
1734 		 * and truncates if not.
1735 		 */
1736 		if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
1737 			/* not enough room, truncate */
1738 			m_adj(m0, (len - sizeof(*nd_opt_rh)) -
1739 			    m0->m_pkthdr.len);
1740 		} else {
1741 			/*
1742 			 * enough room, truncate if not aligned.
1743 			 * we don't pad here for simplicity.
1744 			 */
1745 			size_t extra;
1746 
1747 			extra = m0->m_pkthdr.len % 8;
1748 			if (extra) {
1749 				/* truncate */
1750 				m_adj(m0, -extra);
1751 			}
1752 			len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
1753 		}
1754 
1755 		nd_opt_rh = (struct nd_opt_rd_hdr *)p;
1756 		bzero(nd_opt_rh, sizeof(*nd_opt_rh));
1757 		nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
1758 		nd_opt_rh->nd_opt_rh_len = len >> 3;
1759 		p += sizeof(*nd_opt_rh);
1760 		m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
1761 
1762 		/* connect m0 to m */
1763 		m->m_pkthdr.len += m0->m_pkthdr.len;
1764 		m_cat(m, m0);
1765 		m0 = NULL;
1766 	}
1767 noredhdropt:
1768 	m_freem(m0);
1769 	m0 = NULL;
1770 
1771 	sip6 = mtod(m, struct ip6_hdr *);
1772 	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_src))
1773 		sip6->ip6_src.s6_addr16[1] = 0;
1774 	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_dst))
1775 		sip6->ip6_dst.s6_addr16[1] = 0;
1776 #if 0
1777 	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
1778 		ip6->ip6_src.s6_addr16[1] = 0;
1779 	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst))
1780 		ip6->ip6_dst.s6_addr16[1] = 0;
1781 #endif
1782 	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_target))
1783 		nd_rd->nd_rd_target.s6_addr16[1] = 0;
1784 	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_dst))
1785 		nd_rd->nd_rd_dst.s6_addr16[1] = 0;
1786 
1787 	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
1788 
1789 	nd_rd->nd_rd_cksum = 0;
1790 	m->m_pkthdr.csum_flags = M_ICMP_CSUM_OUT;
1791 
1792 	/* send the packet to outside... */
1793 	ip6_output(m, NULL, NULL, 0, NULL, NULL);
1794 
1795 	icmp6stat.icp6s_outhist[ND_REDIRECT]++;
1796 
1797 	if_put(ifp);
1798 	return;
1799 
1800 fail:
1801 	if_put(ifp);
1802 	m_freem(m);
1803 	m_freem(m0);
1804 }
1805 
1806 /*
1807  * ICMPv6 socket option processing.
1808  */
1809 int
1810 icmp6_ctloutput(int op, struct socket *so, int level, int optname,
1811     struct mbuf **mp)
1812 {
1813 	int error = 0;
1814 	struct inpcb *in6p = sotoinpcb(so);
1815 	struct mbuf *m = *mp;
1816 
1817 	if (level != IPPROTO_ICMPV6) {
1818 		if (op == PRCO_SETOPT)
1819 			(void)m_free(m);
1820 		return EINVAL;
1821 	}
1822 
1823 	switch (op) {
1824 	case PRCO_SETOPT:
1825 		switch (optname) {
1826 		case ICMP6_FILTER:
1827 		    {
1828 			struct icmp6_filter *p;
1829 
1830 			if (m == NULL || m->m_len != sizeof(*p)) {
1831 				error = EMSGSIZE;
1832 				break;
1833 			}
1834 			p = mtod(m, struct icmp6_filter *);
1835 			if (!p || !in6p->inp_icmp6filt) {
1836 				error = EINVAL;
1837 				break;
1838 			}
1839 			bcopy(p, in6p->inp_icmp6filt,
1840 				sizeof(struct icmp6_filter));
1841 			error = 0;
1842 			break;
1843 		    }
1844 
1845 		default:
1846 			error = ENOPROTOOPT;
1847 			break;
1848 		}
1849 		m_freem(m);
1850 		break;
1851 
1852 	case PRCO_GETOPT:
1853 		switch (optname) {
1854 		case ICMP6_FILTER:
1855 		    {
1856 			struct icmp6_filter *p;
1857 
1858 			if (!in6p->inp_icmp6filt) {
1859 				error = EINVAL;
1860 				break;
1861 			}
1862 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1863 			m->m_len = sizeof(struct icmp6_filter);
1864 			p = mtod(m, struct icmp6_filter *);
1865 			bcopy(in6p->inp_icmp6filt, p,
1866 				sizeof(struct icmp6_filter));
1867 			error = 0;
1868 			break;
1869 		    }
1870 
1871 		default:
1872 			error = ENOPROTOOPT;
1873 			break;
1874 		}
1875 		break;
1876 	}
1877 
1878 	return (error);
1879 }
1880 
1881 /*
1882  * Perform rate limit check.
1883  * Returns 0 if it is okay to send the icmp6 packet.
1884  * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
1885  * limitation.
1886  *
1887  * XXX per-destination/type check necessary?
1888  *
1889  * dst - not used at this moment
1890  * type - not used at this moment
1891  * code - not used at this moment
1892  */
1893 int
1894 icmp6_ratelimit(const struct in6_addr *dst, const int type, const int code)
1895 {
1896 	/* PPS limit */
1897 	if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count,
1898 	    icmp6errppslim))
1899 		return 1;	/* The packet is subject to rate limit */
1900 	return 0;		/* okay to send */
1901 }
1902 
1903 struct rtentry *
1904 icmp6_mtudisc_clone(struct sockaddr *dst, u_int rdomain)
1905 {
1906 	struct rtentry *rt;
1907 	int    error;
1908 
1909 	rt = rtalloc(dst, RT_RESOLVE, rdomain);
1910 	if (rt == NULL)
1911 		return NULL;
1912 
1913 	/* If we didn't get a host route, allocate one */
1914 	if ((rt->rt_flags & RTF_HOST) == 0) {
1915 		struct rt_addrinfo info;
1916 		struct rtentry *nrt;
1917 		int s;
1918 
1919 		bzero(&info, sizeof(info));
1920 		info.rti_flags = RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC;
1921 		info.rti_info[RTAX_DST] = dst;
1922 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1923 
1924 		s = splsoftnet();
1925 		error = rtrequest(RTM_ADD, &info, rt->rt_priority, &nrt,
1926 		    rdomain);
1927 		splx(s);
1928 		if (error) {
1929 			rtfree(rt);
1930 			return NULL;
1931 		}
1932 		nrt->rt_rmx = rt->rt_rmx;
1933 		rtfree(rt);
1934 		rt = nrt;
1935 	}
1936 	error = rt_timer_add(rt, icmp6_mtudisc_timeout,
1937 			icmp6_mtudisc_timeout_q, rdomain);
1938 	if (error) {
1939 		rtfree(rt);
1940 		return NULL;
1941 	}
1942 
1943 	return rt;	/* caller need to call rtfree() */
1944 }
1945 
1946 void
1947 icmp6_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
1948 {
1949 	struct ifnet *ifp;
1950 	int s;
1951 
1952 	ifp = if_get(rt->rt_ifidx);
1953 	if (ifp == NULL)
1954 		return;
1955 
1956 	if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
1957 		s = splsoftnet();
1958 		rtdeletemsg(rt, ifp, r->rtt_tableid);
1959 		splx(s);
1960 	} else {
1961 		if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
1962 			rt->rt_rmx.rmx_mtu = 0;
1963 	}
1964 
1965 	if_put(ifp);
1966 }
1967 
1968 void
1969 icmp6_redirect_timeout(struct rtentry *rt, struct rttimer *r)
1970 {
1971 	struct ifnet *ifp;
1972 	int s;
1973 
1974 	ifp = if_get(rt->rt_ifidx);
1975 	if (ifp == NULL)
1976 		return;
1977 
1978 	if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
1979 		s = splsoftnet();
1980 		rtdeletemsg(rt, ifp, r->rtt_tableid);
1981 		splx(s);
1982 	}
1983 
1984 	if_put(ifp);
1985 }
1986 
1987 int *icmpv6ctl_vars[ICMPV6CTL_MAXID] = ICMPV6CTL_VARS;
1988 
1989 int
1990 icmp6_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1991     void *newp, size_t newlen)
1992 {
1993 	/* All sysctl names at this level are terminal. */
1994 	if (namelen != 1)
1995 		return ENOTDIR;
1996 
1997 	switch (name[0]) {
1998 
1999 	case ICMPV6CTL_STATS:
2000 		return sysctl_rdstruct(oldp, oldlenp, newp,
2001 				&icmp6stat, sizeof(icmp6stat));
2002 	case ICMPV6CTL_ND6_DRLIST:
2003 	case ICMPV6CTL_ND6_PRLIST:
2004 		return nd6_sysctl(name[0], oldp, oldlenp, newp, newlen);
2005 	default:
2006 		if (name[0] < ICMPV6CTL_MAXID)
2007 			return (sysctl_int_arr(icmpv6ctl_vars, name, namelen,
2008 			    oldp, oldlenp, newp, newlen));
2009 		return ENOPROTOOPT;
2010 	}
2011 	/* NOTREACHED */
2012 }
2013