xref: /netbsd/sys/netinet/ip_icmp.c (revision bf9ec67e)
1 /*	$NetBSD: ip_icmp.c,v 1.66 2001/11/13 00:32:37 lukem Exp $	*/
2 
3 /*
4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the project nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
34  * All rights reserved.
35  *
36  * This code is derived from software contributed to The NetBSD Foundation
37  * by Public Access Networks Corporation ("Panix").  It was developed under
38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39  *
40  * This code is derived from software contributed to The NetBSD Foundation
41  * by Jason R. Thorpe of Zembu Labs, Inc.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice, this list of conditions and the following disclaimer.
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  * 3. All advertising materials mentioning features or use of this software
52  *    must display the following acknowledgement:
53  *	This product includes software developed by the NetBSD
54  *	Foundation, Inc. and its contributors.
55  * 4. Neither the name of The NetBSD Foundation nor the names of its
56  *    contributors may be used to endorse or promote products derived
57  *    from this software without specific prior written permission.
58  *
59  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
60  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
61  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
63  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
64  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
65  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
66  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
67  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
68  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
69  * POSSIBILITY OF SUCH DAMAGE.
70  */
71 
72 /*
73  * Copyright (c) 1982, 1986, 1988, 1993
74  *	The Regents of the University of California.  All rights reserved.
75  *
76  * Redistribution and use in source and binary forms, with or without
77  * modification, are permitted provided that the following conditions
78  * are met:
79  * 1. Redistributions of source code must retain the above copyright
80  *    notice, this list of conditions and the following disclaimer.
81  * 2. Redistributions in binary form must reproduce the above copyright
82  *    notice, this list of conditions and the following disclaimer in the
83  *    documentation and/or other materials provided with the distribution.
84  * 3. All advertising materials mentioning features or use of this software
85  *    must display the following acknowledgement:
86  *	This product includes software developed by the University of
87  *	California, Berkeley and its contributors.
88  * 4. Neither the name of the University nor the names of its contributors
89  *    may be used to endorse or promote products derived from this software
90  *    without specific prior written permission.
91  *
92  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102  * SUCH DAMAGE.
103  *
104  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
105  */
106 
107 #include <sys/cdefs.h>
108 __KERNEL_RCSID(0, "$NetBSD: ip_icmp.c,v 1.66 2001/11/13 00:32:37 lukem Exp $");
109 
110 #include "opt_ipsec.h"
111 
112 #include <sys/param.h>
113 #include <sys/systm.h>
114 #include <sys/malloc.h>
115 #include <sys/mbuf.h>
116 #include <sys/protosw.h>
117 #include <sys/socket.h>
118 #include <sys/time.h>
119 #include <sys/kernel.h>
120 #include <sys/syslog.h>
121 #include <sys/sysctl.h>
122 
123 #include <net/if.h>
124 #include <net/route.h>
125 
126 #include <netinet/in.h>
127 #include <netinet/in_systm.h>
128 #include <netinet/in_var.h>
129 #include <netinet/ip.h>
130 #include <netinet/ip_icmp.h>
131 #include <netinet/ip_var.h>
132 #include <netinet/in_pcb.h>
133 #include <netinet/icmp_var.h>
134 
135 #ifdef IPSEC
136 #include <netinet6/ipsec.h>
137 #include <netkey/key.h>
138 #endif
139 
140 #include <machine/stdarg.h>
141 
142 /*
143  * ICMP routines: error generation, receive packet processing, and
144  * routines to turnaround packets back to the originator, and
145  * host table maintenance routines.
146  */
147 
148 int	icmpmaskrepl = 0;
149 #ifdef ICMPPRINTFS
150 int	icmpprintfs = 0;
151 #endif
152 int	icmpreturndatabytes = 8;
153 
154 /*
155  * List of callbacks to notify when Path MTU changes are made.
156  */
157 struct icmp_mtudisc_callback {
158 	LIST_ENTRY(icmp_mtudisc_callback) mc_list;
159 	void (*mc_func) __P((struct in_addr));
160 };
161 
162 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
163     LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
164 
165 #if 0
166 static int	ip_next_mtu __P((int, int));
167 #else
168 /*static*/ int	ip_next_mtu __P((int, int));
169 #endif
170 
171 extern int icmperrppslim;
172 static int icmperrpps_count = 0;
173 static struct timeval icmperrppslim_last;
174 static int icmp_rediraccept = 1;
175 static int icmp_redirtimeout = 0;
176 static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
177 
178 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
179 static void icmp_redirect_timeout __P((struct rtentry *, struct rttimer *));
180 
181 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
182 
183 
184 void
185 icmp_init()
186 {
187 	/*
188 	 * This is only useful if the user initializes redirtimeout to
189 	 * something other than zero.
190 	 */
191 	if (icmp_redirtimeout != 0) {
192 		icmp_redirect_timeout_q =
193 			rt_timer_queue_create(icmp_redirtimeout);
194 	}
195 }
196 
197 /*
198  * Register a Path MTU Discovery callback.
199  */
200 void
201 icmp_mtudisc_callback_register(func)
202 	void (*func) __P((struct in_addr));
203 {
204 	struct icmp_mtudisc_callback *mc;
205 
206 	for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
207 	     mc = LIST_NEXT(mc, mc_list)) {
208 		if (mc->mc_func == func)
209 			return;
210 	}
211 
212 	mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
213 	if (mc == NULL)
214 		panic("icmp_mtudisc_callback_register");
215 
216 	mc->mc_func = func;
217 	LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, mc, mc_list);
218 }
219 
220 /*
221  * Generate an error packet of type error
222  * in response to bad packet ip.
223  */
224 void
225 icmp_error(n, type, code, dest, destifp)
226 	struct mbuf *n;
227 	int type, code;
228 	n_long dest;
229 	struct ifnet *destifp;
230 {
231 	struct ip *oip = mtod(n, struct ip *), *nip;
232 	unsigned oiplen = oip->ip_hl << 2;
233 	struct icmp *icp;
234 	struct mbuf *m;
235 	unsigned icmplen, mblen;
236 
237 #ifdef ICMPPRINTFS
238 	if (icmpprintfs)
239 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
240 #endif
241 	if (type != ICMP_REDIRECT)
242 		icmpstat.icps_error++;
243 	/*
244 	 * Don't send error if the original packet was encrypted.
245 	 * Don't send error if not the first fragment of message.
246 	 * Don't error if the old packet protocol was ICMP
247 	 * error message, only known informational types.
248 	 */
249 	if (n->m_flags & M_DECRYPTED)
250 		goto freeit;
251 	if (oip->ip_off &~ (IP_MF|IP_DF))
252 		goto freeit;
253 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
254 	  n->m_len >= oiplen + ICMP_MINLEN &&
255 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
256 		icmpstat.icps_oldicmp++;
257 		goto freeit;
258 	}
259 	/* Don't send error in response to a multicast or broadcast packet */
260 	if (n->m_flags & (M_BCAST|M_MCAST))
261 		goto freeit;
262 
263 	/*
264 	 * First, do a rate limitation check.
265 	 */
266 	if (icmp_ratelimit(&oip->ip_src, type, code)) {
267 		/* XXX stat */
268 		goto freeit;
269 	}
270 
271 	/*
272 	 * Now, formulate icmp message
273 	 */
274 	icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
275 	/*
276 	 * Defend against mbuf chains shorter than oip->ip_len:
277 	 */
278 	mblen = 0;
279 	for (m = n; m && (mblen < icmplen); m = m->m_next)
280 		mblen += m->m_len;
281 	icmplen = min(mblen, icmplen);
282 
283 	/*
284 	 * As we are not required to return everything we have,
285 	 * we return whatever we can return at ease.
286 	 *
287 	 * Note that ICMP datagrams longer than 576 octets are out of spec
288 	 * according to RFC1812; the limit on icmpreturndatabytes below in
289 	 * icmp_sysctl will keep things below that limit.
290 	 */
291 
292 	KASSERT(ICMP_MINLEN <= MCLBYTES);
293 
294 	if (icmplen + ICMP_MINLEN > MCLBYTES)
295 		icmplen = MCLBYTES - ICMP_MINLEN;
296 
297 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
298 	if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
299 		MCLGET(m, M_DONTWAIT);
300 		if ((m->m_flags & M_EXT) == 0) {
301 			m_freem(m);
302 			m = NULL;
303 		}
304 	}
305 	if (m == NULL)
306 		goto freeit;
307 	m->m_len = icmplen + ICMP_MINLEN;
308 	if ((m->m_flags & M_EXT) == 0)
309 		MH_ALIGN(m, m->m_len);
310 	icp = mtod(m, struct icmp *);
311 	if ((u_int)type > ICMP_MAXTYPE)
312 		panic("icmp_error");
313 	icmpstat.icps_outhist[type]++;
314 	icp->icmp_type = type;
315 	if (type == ICMP_REDIRECT)
316 		icp->icmp_gwaddr.s_addr = dest;
317 	else {
318 		icp->icmp_void = 0;
319 		/*
320 		 * The following assignments assume an overlay with the
321 		 * zeroed icmp_void field.
322 		 */
323 		if (type == ICMP_PARAMPROB) {
324 			icp->icmp_pptr = code;
325 			code = 0;
326 		} else if (type == ICMP_UNREACH &&
327 		    code == ICMP_UNREACH_NEEDFRAG && destifp)
328 			icp->icmp_nextmtu = htons(destifp->if_mtu);
329 	}
330 
331 	HTONS(oip->ip_off);
332 	HTONS(oip->ip_len);
333 	icp->icmp_code = code;
334 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
335 	nip = &icp->icmp_ip;
336 
337 	/*
338 	 * Now, copy old ip header (without options)
339 	 * in front of icmp message.
340 	 */
341 	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
342 		panic("icmp len");
343 	m->m_data -= sizeof(struct ip);
344 	m->m_len += sizeof(struct ip);
345 	m->m_pkthdr.len = m->m_len;
346 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
347 	nip = mtod(m, struct ip *);
348 	/* ip_v set in ip_output */
349 	nip->ip_hl = sizeof(struct ip) >> 2;
350 	nip->ip_tos = 0;
351 	nip->ip_len = m->m_len;
352 	/* ip_id set in ip_output */
353 	nip->ip_off = 0;
354 	/* ip_ttl set in icmp_reflect */
355 	nip->ip_p = IPPROTO_ICMP;
356 	nip->ip_src = oip->ip_src;
357 	nip->ip_dst = oip->ip_dst;
358 	icmp_reflect(m);
359 
360 freeit:
361 	m_freem(n);
362 }
363 
364 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
365 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
366 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
367 struct sockaddr_in icmpmask = { 8, 0 };
368 
369 /*
370  * Process a received ICMP message.
371  */
372 void
373 #if __STDC__
374 icmp_input(struct mbuf *m, ...)
375 #else
376 icmp_input(m, va_alist)
377 	struct mbuf *m;
378 	va_dcl
379 #endif
380 {
381 	int proto;
382 	struct icmp *icp;
383 	struct ip *ip = mtod(m, struct ip *);
384 	int icmplen;
385 	int i;
386 	struct in_ifaddr *ia;
387 	void *(*ctlfunc) __P((int, struct sockaddr *, void *));
388 	int code;
389 	int hlen;
390 	va_list ap;
391 	struct rtentry *rt;
392 
393 	va_start(ap, m);
394 	hlen = va_arg(ap, int);
395 	proto = va_arg(ap, int);
396 	va_end(ap);
397 
398 	/*
399 	 * Locate icmp structure in mbuf, and check
400 	 * that not corrupted and of at least minimum length.
401 	 */
402 	icmplen = ip->ip_len - hlen;
403 #ifdef ICMPPRINTFS
404 	if (icmpprintfs)
405 		printf("icmp_input from %x to %x, len %d\n",
406 		    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
407 		    icmplen);
408 #endif
409 	if (icmplen < ICMP_MINLEN) {
410 		icmpstat.icps_tooshort++;
411 		goto freeit;
412 	}
413 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
414 	if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
415 		icmpstat.icps_tooshort++;
416 		return;
417 	}
418 	ip = mtod(m, struct ip *);
419 	m->m_len -= hlen;
420 	m->m_data += hlen;
421 	icp = mtod(m, struct icmp *);
422 	if (in_cksum(m, icmplen)) {
423 		icmpstat.icps_checksum++;
424 		goto freeit;
425 	}
426 	m->m_len += hlen;
427 	m->m_data -= hlen;
428 
429 #ifdef ICMPPRINTFS
430 	/*
431 	 * Message type specific processing.
432 	 */
433 	if (icmpprintfs)
434 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
435 		    icp->icmp_code);
436 #endif
437 	if (icp->icmp_type > ICMP_MAXTYPE)
438 		goto raw;
439 	icmpstat.icps_inhist[icp->icmp_type]++;
440 	code = icp->icmp_code;
441 	switch (icp->icmp_type) {
442 
443 	case ICMP_UNREACH:
444 		switch (code) {
445 			case ICMP_UNREACH_NET:
446 			case ICMP_UNREACH_HOST:
447 			case ICMP_UNREACH_PROTOCOL:
448 			case ICMP_UNREACH_PORT:
449 			case ICMP_UNREACH_SRCFAIL:
450 				code += PRC_UNREACH_NET;
451 				break;
452 
453 			case ICMP_UNREACH_NEEDFRAG:
454 				code = PRC_MSGSIZE;
455 				break;
456 
457 			case ICMP_UNREACH_NET_UNKNOWN:
458 			case ICMP_UNREACH_NET_PROHIB:
459 			case ICMP_UNREACH_TOSNET:
460 				code = PRC_UNREACH_NET;
461 				break;
462 
463 			case ICMP_UNREACH_HOST_UNKNOWN:
464 			case ICMP_UNREACH_ISOLATED:
465 			case ICMP_UNREACH_HOST_PROHIB:
466 			case ICMP_UNREACH_TOSHOST:
467 				code = PRC_UNREACH_HOST;
468 				break;
469 
470 			default:
471 				goto badcode;
472 		}
473 		goto deliver;
474 
475 	case ICMP_TIMXCEED:
476 		if (code > 1)
477 			goto badcode;
478 		code += PRC_TIMXCEED_INTRANS;
479 		goto deliver;
480 
481 	case ICMP_PARAMPROB:
482 		if (code > 1)
483 			goto badcode;
484 		code = PRC_PARAMPROB;
485 		goto deliver;
486 
487 	case ICMP_SOURCEQUENCH:
488 		if (code)
489 			goto badcode;
490 		code = PRC_QUENCH;
491 		goto deliver;
492 
493 	deliver:
494 		/*
495 		 * Problem with datagram; advise higher level routines.
496 		 */
497 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
498 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
499 			icmpstat.icps_badlen++;
500 			goto freeit;
501 		}
502 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
503 			goto badcode;
504 		NTOHS(icp->icmp_ip.ip_len);
505 #ifdef ICMPPRINTFS
506 		if (icmpprintfs)
507 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
508 #endif
509 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
510 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
511 		if (ctlfunc)
512 			(void) (*ctlfunc)(code, sintosa(&icmpsrc),
513 			    &icp->icmp_ip);
514 		break;
515 
516 	badcode:
517 		icmpstat.icps_badcode++;
518 		break;
519 
520 	case ICMP_ECHO:
521 		icp->icmp_type = ICMP_ECHOREPLY;
522 		goto reflect;
523 
524 	case ICMP_TSTAMP:
525 		if (icmplen < ICMP_TSLEN) {
526 			icmpstat.icps_badlen++;
527 			break;
528 		}
529 		icp->icmp_type = ICMP_TSTAMPREPLY;
530 		icp->icmp_rtime = iptime();
531 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
532 		goto reflect;
533 
534 	case ICMP_MASKREQ:
535 		if (icmpmaskrepl == 0)
536 			break;
537 		/*
538 		 * We are not able to respond with all ones broadcast
539 		 * unless we receive it over a point-to-point interface.
540 		 */
541 		if (icmplen < ICMP_MASKLEN) {
542 			icmpstat.icps_badlen++;
543 			break;
544 		}
545 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
546 		    in_nullhost(ip->ip_dst))
547 			icmpdst.sin_addr = ip->ip_src;
548 		else
549 			icmpdst.sin_addr = ip->ip_dst;
550 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
551 		    m->m_pkthdr.rcvif));
552 		if (ia == 0)
553 			break;
554 		icp->icmp_type = ICMP_MASKREPLY;
555 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
556 		if (in_nullhost(ip->ip_src)) {
557 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
558 				ip->ip_src = ia->ia_broadaddr.sin_addr;
559 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
560 				ip->ip_src = ia->ia_dstaddr.sin_addr;
561 		}
562 reflect:
563 		icmpstat.icps_reflect++;
564 		icmpstat.icps_outhist[icp->icmp_type]++;
565 		icmp_reflect(m);
566 		return;
567 
568 	case ICMP_REDIRECT:
569 		if (code > 3)
570 			goto badcode;
571 		if (icmp_rediraccept == 0)
572 			goto freeit;
573 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
574 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
575 			icmpstat.icps_badlen++;
576 			break;
577 		}
578 		/*
579 		 * Short circuit routing redirects to force
580 		 * immediate change in the kernel's routing
581 		 * tables.  The message is also handed to anyone
582 		 * listening on a raw socket (e.g. the routing
583 		 * daemon for use in updating its tables).
584 		 */
585 		icmpgw.sin_addr = ip->ip_src;
586 		icmpdst.sin_addr = icp->icmp_gwaddr;
587 #ifdef	ICMPPRINTFS
588 		if (icmpprintfs)
589 			printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
590 			    icp->icmp_gwaddr);
591 #endif
592 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
593 		rt = NULL;
594 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
595 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
596 		    sintosa(&icmpgw), (struct rtentry **)&rt);
597 		if (rt != NULL && icmp_redirtimeout != 0) {
598 			i = rt_timer_add(rt, icmp_redirect_timeout,
599 					 icmp_redirect_timeout_q);
600 			if (i)
601 				log(LOG_ERR, "ICMP:  redirect failed to "
602 				    "register timeout for route to %x, "
603 				    "code %d\n",
604 				    icp->icmp_ip.ip_dst.s_addr, i);
605 		}
606 		if (rt != NULL)
607 			rtfree(rt);
608 
609 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
610 #ifdef IPSEC
611 		key_sa_routechange((struct sockaddr *)&icmpsrc);
612 #endif
613 		break;
614 
615 	/*
616 	 * No kernel processing for the following;
617 	 * just fall through to send to raw listener.
618 	 */
619 	case ICMP_ECHOREPLY:
620 	case ICMP_ROUTERADVERT:
621 	case ICMP_ROUTERSOLICIT:
622 	case ICMP_TSTAMPREPLY:
623 	case ICMP_IREQREPLY:
624 	case ICMP_MASKREPLY:
625 	default:
626 		break;
627 	}
628 
629 raw:
630 	rip_input(m, hlen, proto);
631 	return;
632 
633 freeit:
634 	m_freem(m);
635 	return;
636 }
637 
638 /*
639  * Reflect the ip packet back to the source
640  */
641 void
642 icmp_reflect(m)
643 	struct mbuf *m;
644 {
645 	struct ip *ip = mtod(m, struct ip *);
646 	struct in_ifaddr *ia;
647 	struct ifaddr *ifa;
648 	struct sockaddr_in *sin = 0;
649 	struct in_addr t;
650 	struct mbuf *opts = 0;
651 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
652 
653 	if (!in_canforward(ip->ip_src) &&
654 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
655 	     htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
656 		m_freem(m);	/* Bad return address */
657 		goto done;	/* ip_output() will check for broadcast */
658 	}
659 	t = ip->ip_dst;
660 	ip->ip_dst = ip->ip_src;
661 	/*
662 	 * If the incoming packet was addressed directly to us, use
663 	 * dst as the src for the reply.  Otherwise (broadcast or
664 	 * anonymous), use an address which corresponds to the
665 	 * incoming interface, with a preference for the address which
666 	 * corresponds to the route to the destination of the ICMP.
667 	 */
668 
669 	/* Look for packet addressed to us */
670 	INADDR_TO_IA(t, ia);
671 
672 	/* look for packet sent to broadcast address */
673 	if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
674 		TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
675 			if (ifa->ifa_addr->sa_family != AF_INET)
676 				continue;
677 			if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
678 				ia = ifatoia(ifa);
679 				break;
680 			}
681 		}
682 	}
683 
684 	if (ia)
685 		sin = &ia->ia_addr;
686 
687 	icmpdst.sin_addr = t;
688 
689 	/* if the packet is addressed somewhere else, compute the
690 	   source address for packets routed back to the source, and
691 	   use that, if it's an address on the interface which
692 	   received the packet */
693 	if (sin == (struct sockaddr_in *)0) {
694 		struct sockaddr_in sin_dst;
695 		struct route icmproute;
696 		int errornum;
697 
698 		sin_dst.sin_family = AF_INET;
699 		sin_dst.sin_len = sizeof(struct sockaddr_in);
700 		sin_dst.sin_addr = ip->ip_dst;
701 		bzero(&icmproute, sizeof(icmproute));
702 		errornum = 0;
703 		sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
704 		/* errornum is never used */
705 		if (icmproute.ro_rt)
706 			RTFREE(icmproute.ro_rt);
707 		/* check to make sure sin is a source address on rcvif */
708 		if (sin) {
709 			t = sin->sin_addr;
710 			sin = (struct sockaddr_in *)0;
711 			INADDR_TO_IA(t, ia);
712 			while (ia) {
713 				if (ia->ia_ifp == m->m_pkthdr.rcvif) {
714 					sin = &ia->ia_addr;
715 					break;
716 				}
717 				NEXT_IA_WITH_SAME_ADDR(ia);
718 			}
719 		}
720 	}
721 
722 	/* if it was not addressed to us, but the route doesn't go out
723 	   the source interface, pick an address on the source
724 	   interface.  This can happen when routing is asymmetric, or
725 	   when the incoming packet was encapsulated */
726 	if (sin == (struct sockaddr_in *)0) {
727 		TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
728 			if (ifa->ifa_addr->sa_family != AF_INET)
729 				continue;
730 			sin = &(ifatoia(ifa)->ia_addr);
731 			break;
732 		}
733 	}
734 
735 	/*
736 	 * The following happens if the packet was not addressed to us,
737 	 * and was received on an interface with no IP address:
738 	 * We find the first AF_INET address on the first non-loopback
739 	 * interface.
740 	 */
741 	if (sin == (struct sockaddr_in *)0)
742 		TAILQ_FOREACH(ia, &in_ifaddr, ia_list) {
743 			if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
744 				continue;
745 			sin = &ia->ia_addr;
746 			break;
747 		}
748 
749 	/*
750 	 * If we still didn't find an address, punt.  We could have an
751 	 * interface up (and receiving packets) with no address.
752 	 */
753 	if (sin == (struct sockaddr_in *)0) {
754 		m_freem(m);
755 		goto done;
756 	}
757 
758 	ip->ip_src = sin->sin_addr;
759 	ip->ip_ttl = MAXTTL;
760 
761 	if (optlen > 0) {
762 		u_char *cp;
763 		int opt, cnt;
764 		u_int len;
765 
766 		/*
767 		 * Retrieve any source routing from the incoming packet;
768 		 * add on any record-route or timestamp options.
769 		 */
770 		cp = (u_char *) (ip + 1);
771 		if ((opts = ip_srcroute()) == 0 &&
772 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
773 			opts->m_len = sizeof(struct in_addr);
774 			*mtod(opts, struct in_addr *) = zeroin_addr;
775 		}
776 		if (opts) {
777 #ifdef ICMPPRINTFS
778 		    if (icmpprintfs)
779 			    printf("icmp_reflect optlen %d rt %d => ",
780 				optlen, opts->m_len);
781 #endif
782 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
783 			    opt = cp[IPOPT_OPTVAL];
784 			    if (opt == IPOPT_EOL)
785 				    break;
786 			    if (opt == IPOPT_NOP)
787 				    len = 1;
788 			    else {
789 				    if (cnt < IPOPT_OLEN + sizeof(*cp))
790 					    break;
791 				    len = cp[IPOPT_OLEN];
792 				    if (len < IPOPT_OLEN + sizeof(*cp) ||
793 				        len > cnt)
794 					    break;
795 			    }
796 			    /*
797 			     * Should check for overflow, but it "can't happen"
798 			     */
799 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
800 				opt == IPOPT_SECURITY) {
801 				    bcopy((caddr_t)cp,
802 					mtod(opts, caddr_t) + opts->m_len, len);
803 				    opts->m_len += len;
804 			    }
805 		    }
806 		    /* Terminate & pad, if necessary */
807 		    if ((cnt = opts->m_len % 4) != 0) {
808 			    for (; cnt < 4; cnt++) {
809 				    *(mtod(opts, caddr_t) + opts->m_len) =
810 					IPOPT_EOL;
811 				    opts->m_len++;
812 			    }
813 		    }
814 #ifdef ICMPPRINTFS
815 		    if (icmpprintfs)
816 			    printf("%d\n", opts->m_len);
817 #endif
818 		}
819 		/*
820 		 * Now strip out original options by copying rest of first
821 		 * mbuf's data back, and adjust the IP length.
822 		 */
823 		ip->ip_len -= optlen;
824 		ip->ip_hl = sizeof(struct ip) >> 2;
825 		m->m_len -= optlen;
826 		if (m->m_flags & M_PKTHDR)
827 			m->m_pkthdr.len -= optlen;
828 		optlen += sizeof(struct ip);
829 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
830 			 (unsigned)(m->m_len - sizeof(struct ip)));
831 	}
832 	m->m_flags &= ~(M_BCAST|M_MCAST);
833 	icmp_send(m, opts);
834 done:
835 	if (opts)
836 		(void)m_free(opts);
837 }
838 
839 /*
840  * Send an icmp packet back to the ip level,
841  * after supplying a checksum.
842  */
843 void
844 icmp_send(m, opts)
845 	struct mbuf *m;
846 	struct mbuf *opts;
847 {
848 	struct ip *ip = mtod(m, struct ip *);
849 	int hlen;
850 	struct icmp *icp;
851 
852 	hlen = ip->ip_hl << 2;
853 	m->m_data += hlen;
854 	m->m_len -= hlen;
855 	icp = mtod(m, struct icmp *);
856 	icp->icmp_cksum = 0;
857 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
858 	m->m_data -= hlen;
859 	m->m_len += hlen;
860 #ifdef ICMPPRINTFS
861 	if (icmpprintfs)
862 		printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
863 #endif
864 #ifdef IPSEC
865 	/* Don't lookup socket */
866 	(void)ipsec_setsocket(m, NULL);
867 #endif
868 	(void) ip_output(m, opts, NULL, 0, NULL);
869 }
870 
871 n_time
872 iptime()
873 {
874 	struct timeval atv;
875 	u_long t;
876 
877 	microtime(&atv);
878 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
879 	return (htonl(t));
880 }
881 
882 int
883 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
884 	int *name;
885 	u_int namelen;
886 	void *oldp;
887 	size_t *oldlenp;
888 	void *newp;
889 	size_t newlen;
890 {
891 	int arg, error;
892 
893 	/* All sysctl names at this level are terminal. */
894 	if (namelen != 1)
895 		return (ENOTDIR);
896 
897 	switch (name[0])
898 	{
899 	case ICMPCTL_MASKREPL:
900 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
901 		break;
902 	case ICMPCTL_RETURNDATABYTES:
903 		arg = icmpreturndatabytes;
904 		error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
905 		if (error)
906 			break;
907 		if ((arg >= 8) || (arg <= 512))
908 			icmpreturndatabytes = arg;
909 		else
910 			error = EINVAL;
911 		break;
912 	case ICMPCTL_ERRPPSLIMIT:
913 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
914 		break;
915 	case ICMPCTL_REDIRACCEPT:
916 		error = sysctl_int(oldp, oldlenp, newp, newlen,
917 				   &icmp_rediraccept);
918 		break;
919 	case ICMPCTL_REDIRTIMEOUT:
920 		error = sysctl_int(oldp, oldlenp, newp, newlen,
921 				   &icmp_redirtimeout);
922 		if (icmp_redirect_timeout_q != NULL) {
923 			if (icmp_redirtimeout == 0) {
924 				rt_timer_queue_destroy(icmp_redirect_timeout_q,
925 						       TRUE);
926 				icmp_redirect_timeout_q = NULL;
927 			} else {
928 				rt_timer_queue_change(icmp_redirect_timeout_q,
929 						      icmp_redirtimeout);
930 			}
931 		} else if (icmp_redirtimeout > 0) {
932 			icmp_redirect_timeout_q =
933 				rt_timer_queue_create(icmp_redirtimeout);
934 		}
935 		return (error);
936 
937 		break;
938 	default:
939 		error = ENOPROTOOPT;
940 		break;
941 	}
942 	return error;
943 }
944 
945 /* Table of common MTUs: */
946 
947 static const u_int mtu_table[] = {
948 	65535, 65280, 32000, 17914, 9180, 8166,
949 	4352, 2002, 1492, 1006, 508, 296, 68, 0
950 };
951 
952 void
953 icmp_mtudisc(icp, faddr)
954 	struct icmp *icp;
955 	struct in_addr faddr;
956 {
957 	struct icmp_mtudisc_callback *mc;
958 	struct sockaddr *dst = sintosa(&icmpsrc);
959 	struct rtentry *rt;
960 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
961 	int    error;
962 
963 	rt = rtalloc1(dst, 1);
964 	if (rt == 0)
965 		return;
966 
967 	/* If we didn't get a host route, allocate one */
968 
969 	if ((rt->rt_flags & RTF_HOST) == 0) {
970 		struct rtentry *nrt;
971 
972 		error = rtrequest((int) RTM_ADD, dst,
973 		    (struct sockaddr *) rt->rt_gateway,
974 		    (struct sockaddr *) 0,
975 		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
976 		if (error) {
977 			rtfree(rt);
978 			return;
979 		}
980 		nrt->rt_rmx = rt->rt_rmx;
981 		rtfree(rt);
982 		rt = nrt;
983 	}
984 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
985 	if (error) {
986 		rtfree(rt);
987 		return;
988 	}
989 
990 	if (mtu == 0) {
991 		int i = 0;
992 
993 		mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
994 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
995 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
996 			mtu -= (icp->icmp_ip.ip_hl << 2);
997 
998 		/* If we still can't guess a value, try the route */
999 
1000 		if (mtu == 0) {
1001 			mtu = rt->rt_rmx.rmx_mtu;
1002 
1003 			/* If no route mtu, default to the interface mtu */
1004 
1005 			if (mtu == 0)
1006 				mtu = rt->rt_ifp->if_mtu;
1007 		}
1008 
1009 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
1010 			if (mtu > mtu_table[i]) {
1011 				mtu = mtu_table[i];
1012 				break;
1013 			}
1014 	}
1015 
1016 	/*
1017 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
1018 	 *	  to turn off PMTU for a route, and the kernel can
1019 	 *	  set it to indicate a serious problem with PMTU
1020 	 *	  on a route.  We should be using a separate flag
1021 	 *	  for the kernel to indicate this.
1022 	 */
1023 
1024 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1025 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
1026 			rt->rt_rmx.rmx_locks |= RTV_MTU;
1027 		else if (rt->rt_rmx.rmx_mtu > mtu ||
1028 			 rt->rt_rmx.rmx_mtu == 0) {
1029 			icmpstat.icps_pmtuchg++;
1030 			rt->rt_rmx.rmx_mtu = mtu;
1031 		}
1032 	}
1033 
1034 	if (rt)
1035 		rtfree(rt);
1036 
1037 	/*
1038 	 * Notify protocols that the MTU for this destination
1039 	 * has changed.
1040 	 */
1041 	for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
1042 	     mc = LIST_NEXT(mc, mc_list))
1043 		(*mc->mc_func)(faddr);
1044 }
1045 
1046 /*
1047  * Return the next larger or smaller MTU plateau (table from RFC 1191)
1048  * given current value MTU.  If DIR is less than zero, a larger plateau
1049  * is returned; otherwise, a smaller value is returned.
1050  */
1051 int
1052 ip_next_mtu(mtu, dir)	/* XXX */
1053 	int mtu;
1054 	int dir;
1055 {
1056 	int i;
1057 
1058 	for (i = 0; i < (sizeof mtu_table) / (sizeof mtu_table[0]); i++) {
1059 		if (mtu >= mtu_table[i])
1060 			break;
1061 	}
1062 
1063 	if (dir < 0) {
1064 		if (i == 0) {
1065 			return 0;
1066 		} else {
1067 			return mtu_table[i - 1];
1068 		}
1069 	} else {
1070 		if (mtu_table[i] == 0) {
1071 			return 0;
1072 		} else if (mtu > mtu_table[i]) {
1073 			return mtu_table[i];
1074 		} else {
1075 			return mtu_table[i + 1];
1076 		}
1077 	}
1078 }
1079 
1080 static void
1081 icmp_mtudisc_timeout(rt, r)
1082 	struct rtentry *rt;
1083 	struct rttimer *r;
1084 {
1085 	if (rt == NULL)
1086 		panic("icmp_mtudisc_timeout:  bad route to timeout");
1087 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1088 	    (RTF_DYNAMIC | RTF_HOST)) {
1089 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
1090 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
1091 	} else {
1092 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1093 			rt->rt_rmx.rmx_mtu = 0;
1094 		}
1095 	}
1096 }
1097 
1098 static void
1099 icmp_redirect_timeout(rt, r)
1100 	struct rtentry *rt;
1101 	struct rttimer *r;
1102 {
1103 	if (rt == NULL)
1104 		panic("icmp_redirect_timeout:  bad route to timeout");
1105 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1106 	    (RTF_DYNAMIC | RTF_HOST)) {
1107 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
1108 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
1109 	}
1110 }
1111 
1112 /*
1113  * Perform rate limit check.
1114  * Returns 0 if it is okay to send the icmp packet.
1115  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
1116  * limitation.
1117  *
1118  * XXX per-destination/type check necessary?
1119  */
1120 static int
1121 icmp_ratelimit(dst, type, code)
1122 	const struct in_addr *dst;
1123 	const int type;			/* not used at this moment */
1124 	const int code;			/* not used at this moment */
1125 {
1126 
1127 	/* PPS limit */
1128 	if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
1129 	    icmperrppslim)) {
1130 		/* The packet is subject to rate limit */
1131 		return 1;
1132 	}
1133 
1134 	/*okay to send*/
1135 	return 0;
1136 }
1137