xref: /openbsd/sys/netinet/ip_icmp.c (revision 404b540a)
1 /*	$OpenBSD: ip_icmp.c,v 1.84 2009/06/09 11:52:54 sthen Exp $	*/
2 /*	$NetBSD: ip_icmp.c,v 1.19 1996/02/13 23:42:22 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  *	This product includes software developed at the Information
48  *	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include "carp.h"
72 #include "pf.h"
73 
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/mbuf.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/sysctl.h>
80 
81 #include <net/if.h>
82 #include <net/route.h>
83 
84 #include <netinet/in.h>
85 #include <netinet/in_systm.h>
86 #include <netinet/in_var.h>
87 #include <netinet/ip.h>
88 #include <netinet/ip_icmp.h>
89 #include <netinet/ip_var.h>
90 #include <netinet/icmp_var.h>
91 
92 #if NCARP > 0
93 #include <net/if_types.h>
94 #include <netinet/ip_carp.h>
95 #endif
96 
97 #if NPF > 0
98 #include <net/pfvar.h>
99 #endif
100 
101 /*
102  * ICMP routines: error generation, receive packet processing, and
103  * routines to turnaround packets back to the originator, and
104  * host table maintenance routines.
105  */
106 
107 int	icmpmaskrepl = 0;
108 int	icmpbmcastecho = 0;
109 int	icmptstamprepl = 1;
110 #ifdef ICMPPRINTFS
111 int	icmpprintfs = 0;
112 #endif
113 int	icmperrppslim = 100;
114 int	icmperrpps_count = 0;
115 struct timeval icmperrppslim_last;
116 int	icmp_rediraccept = 0;
117 int	icmp_redirtimeout = 10 * 60;
118 static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
119 struct	icmpstat icmpstat;
120 
121 int *icmpctl_vars[ICMPCTL_MAXID] = ICMPCTL_VARS;
122 
123 void icmp_mtudisc_timeout(struct rtentry *, struct rttimer *);
124 int icmp_ratelimit(const struct in_addr *, const int, const int);
125 void icmp_redirect_timeout(struct rtentry *, struct rttimer *);
126 
127 extern	struct protosw inetsw[];
128 
129 void
130 icmp_init(void)
131 {
132 	/*
133 	 * This is only useful if the user initializes redirtimeout to
134 	 * something other than zero.
135 	 */
136 	if (icmp_redirtimeout != 0) {
137 		icmp_redirect_timeout_q =
138 		    rt_timer_queue_create(icmp_redirtimeout);
139 	}
140 }
141 
142 struct mbuf *
143 icmp_do_error(struct mbuf *n, int type, int code, n_long dest, int destmtu)
144 {
145 	struct ip *oip = mtod(n, struct ip *), *nip;
146 	unsigned oiplen = oip->ip_hl << 2;
147 	struct icmp *icp;
148 	struct mbuf *m;
149 	unsigned icmplen, mblen;
150 
151 #ifdef ICMPPRINTFS
152 	if (icmpprintfs)
153 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
154 #endif
155 	if (type != ICMP_REDIRECT)
156 		icmpstat.icps_error++;
157 	/*
158 	 * Don't send error if not the first fragment of message.
159 	 * Don't error if the old packet protocol was ICMP
160 	 * error message, only known informational types.
161 	 */
162 	if (oip->ip_off & htons(IP_OFFMASK))
163 		goto freeit;
164 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
165 	    n->m_len >= oiplen + ICMP_MINLEN &&
166 	    !ICMP_INFOTYPE(((struct icmp *)
167 	    ((caddr_t)oip + oiplen))->icmp_type)) {
168 		icmpstat.icps_oldicmp++;
169 		goto freeit;
170 	}
171 	/* Don't send error in response to a multicast or broadcast packet */
172 	if (n->m_flags & (M_BCAST|M_MCAST))
173 		goto freeit;
174 
175 	/*
176 	 * First, do a rate limitation check.
177 	 */
178 	if (icmp_ratelimit(&oip->ip_src, type, code))
179 		goto freeit;	/* XXX stat */
180 
181 	/*
182 	 * Now, formulate icmp message
183 	 */
184 	icmplen = oiplen + min(8, ntohs(oip->ip_len));
185 	/*
186 	 * Defend against mbuf chains shorter than oip->ip_len:
187 	 */
188 	mblen = 0;
189 	for (m = n; m && (mblen < icmplen); m = m->m_next)
190 		mblen += m->m_len;
191 	icmplen = min(mblen, icmplen);
192 
193 	/*
194 	 * As we are not required to return everything we have,
195 	 * we return whatever we can return at ease.
196 	 *
197 	 * Note that ICMP datagrams longer than 576 octets are out of spec
198 	 * according to RFC1812;
199 	 */
200 
201 	KASSERT(ICMP_MINLEN <= MCLBYTES);
202 
203 	if (icmplen + ICMP_MINLEN > MCLBYTES)
204 		icmplen = MCLBYTES - ICMP_MINLEN - sizeof (struct ip);
205 
206 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
207 	if (m && (sizeof (struct ip) + icmplen + ICMP_MINLEN > MHLEN)) {
208 		MCLGET(m, M_DONTWAIT);
209 		if ((m->m_flags & M_EXT) == 0) {
210 			m_freem(m);
211 			m = NULL;
212 		}
213 	}
214 	if (m == NULL)
215 		goto freeit;
216 	/* keep in same domain and rtable (the latter is a bit unclear) */
217 	m->m_pkthdr.rdomain = n->m_pkthdr.rdomain;
218 	m->m_len = icmplen + ICMP_MINLEN;
219 	if ((m->m_flags & M_EXT) == 0)
220 		MH_ALIGN(m, m->m_len);
221 	icp = mtod(m, struct icmp *);
222 	if ((u_int)type > ICMP_MAXTYPE)
223 		panic("icmp_error");
224 	icmpstat.icps_outhist[type]++;
225 	icp->icmp_type = type;
226 	if (type == ICMP_REDIRECT)
227 		icp->icmp_gwaddr.s_addr = dest;
228 	else {
229 		icp->icmp_void = 0;
230 		/*
231 		 * The following assignments assume an overlay with the
232 		 * zeroed icmp_void field.
233 		 */
234 		if (type == ICMP_PARAMPROB) {
235 			icp->icmp_pptr = code;
236 			code = 0;
237 		} else if (type == ICMP_UNREACH &&
238 		    code == ICMP_UNREACH_NEEDFRAG && destmtu)
239 			icp->icmp_nextmtu = htons(destmtu);
240 	}
241 
242 	icp->icmp_code = code;
243 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
244 
245 	/*
246 	 * Now, copy old ip header (without options)
247 	 * in front of icmp message.
248 	 */
249 	if ((m->m_flags & M_EXT) == 0 &&
250 	    m->m_data - sizeof(struct ip) < m->m_pktdat)
251 		panic("icmp len");
252 	m->m_data -= sizeof(struct ip);
253 	m->m_len += sizeof(struct ip);
254 	m->m_pkthdr.len = m->m_len;
255 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
256 	nip = mtod(m, struct ip *);
257 	/* ip_v set in ip_output */
258 	nip->ip_hl = sizeof(struct ip) >> 2;
259 	nip->ip_tos = 0;
260 	nip->ip_len = htons(m->m_len);
261 	/* ip_id set in ip_output */
262 	nip->ip_off = 0;
263 	/* ip_ttl set in icmp_reflect */
264 	nip->ip_p = IPPROTO_ICMP;
265 	nip->ip_src = oip->ip_src;
266 	nip->ip_dst = oip->ip_dst;
267 
268 	/* move PF_GENERATED to new packet, if existent XXX preserve more? */
269 	if (n->m_pkthdr.pf.flags & PF_TAG_GENERATED)
270 		m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
271 
272 	m_freem(n);
273 	return (m);
274 
275 freeit:
276 	m_freem(n);
277 	return (NULL);
278 }
279 
280 /*
281  * Generate an error packet of type error
282  * in response to bad packet ip.
283  *
284  * The ip packet inside has ip_off and ip_len in host byte order.
285  */
286 void
287 icmp_error(struct mbuf *n, int type, int code, n_long dest, int destmtu)
288 {
289 	struct mbuf *m;
290 
291 	m = icmp_do_error(n, type, code, dest, destmtu);
292 	if (m != NULL)
293 		icmp_reflect(m);
294 }
295 
296 struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
297 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
298 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
299 
300 /*
301  * Process a received ICMP message.
302  */
303 void
304 icmp_input(struct mbuf *m, ...)
305 {
306 	struct icmp *icp;
307 	struct ip *ip = mtod(m, struct ip *);
308 	int icmplen;
309 	int i;
310 	struct in_ifaddr *ia;
311 	void *(*ctlfunc)(int, struct sockaddr *, void *);
312 	int code;
313 	extern u_char ip_protox[];
314 	int hlen;
315 	va_list ap;
316 	struct rtentry *rt;
317 
318 	va_start(ap, m);
319 	hlen = va_arg(ap, int);
320 	va_end(ap);
321 
322 	/*
323 	 * Locate icmp structure in mbuf, and check
324 	 * that not corrupted and of at least minimum length.
325 	 */
326 	icmplen = ntohs(ip->ip_len) - hlen;
327 #ifdef ICMPPRINTFS
328 	if (icmpprintfs) {
329 		char buf[4 * sizeof("123")];
330 
331 		strlcpy(buf, inet_ntoa(ip->ip_dst), sizeof buf);
332 		printf("icmp_input from %s to %s, len %d\n",
333 		    inet_ntoa(ip->ip_src), buf, icmplen);
334 	}
335 #endif
336 	if (icmplen < ICMP_MINLEN) {
337 		icmpstat.icps_tooshort++;
338 		goto freeit;
339 	}
340 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
341 	if (m->m_len < i && (m = m_pullup(m, i)) == NULL) {
342 		icmpstat.icps_tooshort++;
343 		return;
344 	}
345 	ip = mtod(m, struct ip *);
346 	m->m_len -= hlen;
347 	m->m_data += hlen;
348 	icp = mtod(m, struct icmp *);
349 	if (in_cksum(m, icmplen)) {
350 		icmpstat.icps_checksum++;
351 		goto freeit;
352 	}
353 	m->m_len += hlen;
354 	m->m_data -= hlen;
355 
356 #ifdef ICMPPRINTFS
357 	/*
358 	 * Message type specific processing.
359 	 */
360 	if (icmpprintfs)
361 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
362 		    icp->icmp_code);
363 #endif
364 	if (icp->icmp_type > ICMP_MAXTYPE)
365 		goto raw;
366 	icmpstat.icps_inhist[icp->icmp_type]++;
367 	code = icp->icmp_code;
368 	switch (icp->icmp_type) {
369 
370 	case ICMP_UNREACH:
371 		switch (code) {
372 		case ICMP_UNREACH_NET:
373 		case ICMP_UNREACH_HOST:
374 		case ICMP_UNREACH_PROTOCOL:
375 		case ICMP_UNREACH_PORT:
376 		case ICMP_UNREACH_SRCFAIL:
377 			code += PRC_UNREACH_NET;
378 			break;
379 
380 		case ICMP_UNREACH_NEEDFRAG:
381 			code = PRC_MSGSIZE;
382 			break;
383 
384 		case ICMP_UNREACH_NET_UNKNOWN:
385 		case ICMP_UNREACH_NET_PROHIB:
386 		case ICMP_UNREACH_TOSNET:
387 			code = PRC_UNREACH_NET;
388 			break;
389 
390 		case ICMP_UNREACH_HOST_UNKNOWN:
391 		case ICMP_UNREACH_ISOLATED:
392 		case ICMP_UNREACH_HOST_PROHIB:
393 		case ICMP_UNREACH_TOSHOST:
394 		case ICMP_UNREACH_FILTER_PROHIB:
395 		case ICMP_UNREACH_HOST_PRECEDENCE:
396 		case ICMP_UNREACH_PRECEDENCE_CUTOFF:
397 			code = PRC_UNREACH_HOST;
398 			break;
399 
400 		default:
401 			goto badcode;
402 		}
403 		goto deliver;
404 
405 	case ICMP_TIMXCEED:
406 		if (code > 1)
407 			goto badcode;
408 		code += PRC_TIMXCEED_INTRANS;
409 		goto deliver;
410 
411 	case ICMP_PARAMPROB:
412 		if (code > 1)
413 			goto badcode;
414 		code = PRC_PARAMPROB;
415 		goto deliver;
416 
417 	case ICMP_SOURCEQUENCH:
418 		if (code)
419 			goto badcode;
420 		code = PRC_QUENCH;
421 	deliver:
422 		/* Free packet atttributes */
423 		if (m->m_flags & M_PKTHDR)
424 			m_tag_delete_chain(m);
425 
426 		/*
427 		 * Problem with datagram; advise higher level routines.
428 		 */
429 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
430 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
431 			icmpstat.icps_badlen++;
432 			goto freeit;
433 		}
434 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
435 			goto badcode;
436 #ifdef INET6
437 		/* Get more contiguous data for a v6 in v4 ICMP message. */
438 		if (icp->icmp_ip.ip_p == IPPROTO_IPV6) {
439 			if (icmplen < ICMP_V6ADVLENMIN ||
440 			    icmplen < ICMP_V6ADVLEN(icp)) {
441 				icmpstat.icps_badlen++;
442 				goto freeit;
443 			} else {
444 				if ((m = m_pullup(m, (ip->ip_hl << 2) +
445 				    ICMP_V6ADVLEN(icp))) == NULL) {
446 					icmpstat.icps_tooshort++;
447 					return;
448 				}
449 				ip = mtod(m, struct ip *);
450 				icp = (struct icmp *)
451 				    (m->m_data + (ip->ip_hl << 2));
452 			}
453 		}
454 #endif /* INET6 */
455 #ifdef ICMPPRINTFS
456 		if (icmpprintfs)
457 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
458 #endif
459 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
460 #if NCARP > 0
461 		if (m->m_pkthdr.rcvif->if_type == IFT_CARP &&
462 		    carp_lsdrop(m, AF_INET, &icmpsrc.sin_addr.s_addr,
463 		    &ip->ip_dst.s_addr))
464 			goto freeit;
465 #endif
466 		/*
467 		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
468 		 * notification to TCP layer.
469 		 */
470 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
471 		if (ctlfunc)
472 			(*ctlfunc)(code, sintosa(&icmpsrc), &icp->icmp_ip);
473 		break;
474 
475 	badcode:
476 		icmpstat.icps_badcode++;
477 		break;
478 
479 	case ICMP_ECHO:
480 		if (!icmpbmcastecho &&
481 		    (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
482 			icmpstat.icps_bmcastecho++;
483 			break;
484 		}
485 		icp->icmp_type = ICMP_ECHOREPLY;
486 		goto reflect;
487 
488 	case ICMP_TSTAMP:
489 		if (icmptstamprepl == 0)
490 			break;
491 
492 		if (!icmpbmcastecho &&
493 		    (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
494 			icmpstat.icps_bmcastecho++;
495 			break;
496 		}
497 		if (icmplen < ICMP_TSLEN) {
498 			icmpstat.icps_badlen++;
499 			break;
500 		}
501 		icp->icmp_type = ICMP_TSTAMPREPLY;
502 		icp->icmp_rtime = iptime();
503 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
504 		goto reflect;
505 
506 	case ICMP_MASKREQ:
507 		if (icmpmaskrepl == 0)
508 			break;
509 		/*
510 		 * We are not able to respond with all ones broadcast
511 		 * unless we receive it over a point-to-point interface.
512 		 */
513 		if (icmplen < ICMP_MASKLEN) {
514 			icmpstat.icps_badlen++;
515 			break;
516 		}
517 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
518 		    ip->ip_dst.s_addr == INADDR_ANY)
519 			icmpdst.sin_addr = ip->ip_src;
520 		else
521 			icmpdst.sin_addr = ip->ip_dst;
522 		if (m->m_pkthdr.rcvif == NULL)
523 			break;
524 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
525 		    m->m_pkthdr.rcvif));
526 		if (ia == 0)
527 			break;
528 		icp->icmp_type = ICMP_MASKREPLY;
529 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
530 		if (ip->ip_src.s_addr == 0) {
531 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
532 				ip->ip_src = ia->ia_broadaddr.sin_addr;
533 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
534 				ip->ip_src = ia->ia_dstaddr.sin_addr;
535 		}
536 reflect:
537 #if NCARP > 0
538 		if (m->m_pkthdr.rcvif->if_type == IFT_CARP &&
539 		    carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr,
540 		    &ip->ip_dst.s_addr))
541 			goto freeit;
542 #endif
543 		/* Free packet atttributes */
544 		if (m->m_flags & M_PKTHDR)
545 			m_tag_delete_chain(m);
546 
547 		icmpstat.icps_reflect++;
548 		icmpstat.icps_outhist[icp->icmp_type]++;
549 		icmp_reflect(m);
550 		return;
551 
552 	case ICMP_REDIRECT:
553 		/* Free packet atttributes */
554 		if (m->m_flags & M_PKTHDR)
555 			m_tag_delete_chain(m);
556 		if (icmp_rediraccept == 0)
557 			goto freeit;
558 		if (code > 3)
559 			goto badcode;
560 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
561 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
562 			icmpstat.icps_badlen++;
563 			break;
564 		}
565 		/*
566 		 * Short circuit routing redirects to force
567 		 * immediate change in the kernel's routing
568 		 * tables.  The message is also handed to anyone
569 		 * listening on a raw socket (e.g. the routing
570 		 * daemon for use in updating its tables).
571 		 */
572 		icmpgw.sin_addr = ip->ip_src;
573 		icmpdst.sin_addr = icp->icmp_gwaddr;
574 #ifdef	ICMPPRINTFS
575 		if (icmpprintfs) {
576 			char buf[4 * sizeof("123")];
577 			strlcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst),
578 			    sizeof buf);
579 
580 			printf("redirect dst %s to %s\n",
581 			    buf, inet_ntoa(icp->icmp_gwaddr));
582 		}
583 #endif
584 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
585 #if NCARP > 0
586 		if (m->m_pkthdr.rcvif->if_type == IFT_CARP &&
587 		    carp_lsdrop(m, AF_INET, &icmpsrc.sin_addr.s_addr,
588 		    &ip->ip_dst.s_addr))
589 			goto freeit;
590 #endif
591 		rt = NULL;
592 		/* XXX rdomain vs. rtable */
593 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
594 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
595 		    sintosa(&icmpgw), (struct rtentry **)&rt,
596 		    m->m_pkthdr.rdomain);
597 		if (rt != NULL && icmp_redirtimeout != 0) {
598 			(void)rt_timer_add(rt, icmp_redirect_timeout,
599 			    icmp_redirect_timeout_q);
600 		}
601 		if (rt != NULL)
602 			rtfree(rt);
603 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
604 		break;
605 
606 	/*
607 	 * No kernel processing for the following;
608 	 * just fall through to send to raw listener.
609 	 */
610 	case ICMP_ECHOREPLY:
611 	case ICMP_ROUTERADVERT:
612 	case ICMP_ROUTERSOLICIT:
613 	case ICMP_TSTAMPREPLY:
614 	case ICMP_IREQREPLY:
615 	case ICMP_MASKREPLY:
616 	case ICMP_TRACEROUTE:
617 	case ICMP_DATACONVERR:
618 	case ICMP_MOBILE_REDIRECT:
619 	case ICMP_IPV6_WHEREAREYOU:
620 	case ICMP_IPV6_IAMHERE:
621 	case ICMP_MOBILE_REGREQUEST:
622 	case ICMP_MOBILE_REGREPLY:
623 	case ICMP_PHOTURIS:
624 	default:
625 		break;
626 	}
627 
628 raw:
629 	rip_input(m);
630 	return;
631 
632 freeit:
633 	m_freem(m);
634 }
635 
636 /*
637  * Reflect the ip packet back to the source
638  */
639 void
640 icmp_reflect(struct mbuf *m)
641 {
642 	struct ip *ip = mtod(m, struct ip *);
643 	struct in_ifaddr *ia;
644 	struct in_addr t;
645 	struct mbuf *opts = 0;
646 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
647 
648 	if (!in_canforward(ip->ip_src) &&
649 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
650 	    htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
651 		m_freem(m);	/* Bad return address */
652 		goto done;	/* ip_output() will check for broadcast */
653 	}
654 
655 #if NPF > 0
656 	pf_pkt_addr_changed(m);
657 #endif
658 	t = ip->ip_dst;
659 	ip->ip_dst = ip->ip_src;
660 	/*
661 	 * If the incoming packet was addressed directly to us,
662 	 * use dst as the src for the reply.  For broadcast, use
663 	 * the address which corresponds to the incoming interface.
664 	 */
665 	TAILQ_FOREACH(ia, &in_ifaddr, ia_list) {
666 		if (ia->ia_ifp->if_rdomain != m->m_pkthdr.rdomain)
667 			continue;
668 		if (t.s_addr == ia->ia_addr.sin_addr.s_addr)
669 			break;
670 		if ((ia->ia_ifp->if_flags & IFF_BROADCAST) &&
671 		    t.s_addr == ia->ia_broadaddr.sin_addr.s_addr)
672 			break;
673 	}
674 	/*
675 	 * The following happens if the packet was not addressed to us.
676 	 * Use the new source address and do a route lookup. If it fails
677 	 * drop the packet as there is no path to the host.
678 	 */
679 	if (ia == (struct in_ifaddr *)0) {
680 		struct sockaddr_in *dst;
681 		struct route ro;
682 
683 		bzero((caddr_t) &ro, sizeof(ro));
684 		dst = satosin(&ro.ro_dst);
685 		dst->sin_family = AF_INET;
686 		dst->sin_len = sizeof(*dst);
687 		dst->sin_addr = ip->ip_src;
688 
689 		/* keep packet in the original VRF instance */
690 		ro.ro_rt = rtalloc1(&ro.ro_dst, 1,
691 		     m->m_pkthdr.rdomain);
692 		if (ro.ro_rt == 0) {
693 			ipstat.ips_noroute++;
694 			m_freem(m);
695 			goto done;
696 		}
697 
698 		ia = ifatoia(ro.ro_rt->rt_ifa);
699 		ro.ro_rt->rt_use++;
700 		RTFREE(ro.ro_rt);
701 	}
702 
703 	t = ia->ia_addr.sin_addr;
704 	ip->ip_src = t;
705 	ip->ip_ttl = MAXTTL;
706 
707 	if (optlen > 0) {
708 		u_char *cp;
709 		int opt, cnt;
710 		u_int len;
711 
712 		/*
713 		 * Retrieve any source routing from the incoming packet;
714 		 * add on any record-route or timestamp options.
715 		 */
716 		cp = (u_char *) (ip + 1);
717 		if ((opts = ip_srcroute()) == 0 &&
718 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
719 			opts->m_len = sizeof(struct in_addr);
720 			mtod(opts, struct in_addr *)->s_addr = 0;
721 		}
722 		if (opts) {
723 #ifdef ICMPPRINTFS
724 			if (icmpprintfs)
725 				printf("icmp_reflect optlen %d rt %d => ",
726 				    optlen, opts->m_len);
727 #endif
728 			for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
729 				opt = cp[IPOPT_OPTVAL];
730 				if (opt == IPOPT_EOL)
731 					break;
732 				if (opt == IPOPT_NOP)
733 					len = 1;
734 				else {
735 					if (cnt < IPOPT_OLEN + sizeof(*cp))
736 						break;
737 					len = cp[IPOPT_OLEN];
738 					if (len < IPOPT_OLEN + sizeof(*cp) ||
739 					    len > cnt)
740 						break;
741 				}
742 				/*
743 				 * Should check for overflow, but it
744 				 * "can't happen"
745 				 */
746 				if (opt == IPOPT_RR || opt == IPOPT_TS ||
747 				    opt == IPOPT_SECURITY) {
748 					bcopy((caddr_t)cp,
749 					    mtod(opts, caddr_t) + opts->m_len,
750 					    len);
751 					opts->m_len += len;
752 				}
753 			}
754 			/* Terminate & pad, if necessary */
755 			if ((cnt = opts->m_len % 4) != 0)
756 				for (; cnt < 4; cnt++) {
757 					*(mtod(opts, caddr_t) + opts->m_len) =
758 					    IPOPT_EOL;
759 					opts->m_len++;
760 				}
761 #ifdef ICMPPRINTFS
762 			if (icmpprintfs)
763 				printf("%d\n", opts->m_len);
764 #endif
765 		}
766 		/*
767 		 * Now strip out original options by copying rest of first
768 		 * mbuf's data back, and adjust the IP length.
769 		 */
770 		ip->ip_len = htons(ntohs(ip->ip_len) - optlen);
771 		ip->ip_hl = sizeof(struct ip) >> 2;
772 		m->m_len -= optlen;
773 		if (m->m_flags & M_PKTHDR)
774 			m->m_pkthdr.len -= optlen;
775 		optlen += sizeof(struct ip);
776 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
777 		    (unsigned)(m->m_len - sizeof(struct ip)));
778 	}
779 	m->m_flags &= ~(M_BCAST|M_MCAST);
780 	icmp_send(m, opts);
781 done:
782 	if (opts)
783 		(void)m_free(opts);
784 }
785 
786 /*
787  * Send an icmp packet back to the ip level,
788  * after supplying a checksum.
789  */
790 void
791 icmp_send(struct mbuf *m, struct mbuf *opts)
792 {
793 	struct ip *ip = mtod(m, struct ip *);
794 	int hlen;
795 	struct icmp *icp;
796 
797 	hlen = ip->ip_hl << 2;
798 	m->m_data += hlen;
799 	m->m_len -= hlen;
800 	icp = mtod(m, struct icmp *);
801 	icp->icmp_cksum = 0;
802 	icp->icmp_cksum = in_cksum(m, ntohs(ip->ip_len) - hlen);
803 	m->m_data -= hlen;
804 	m->m_len += hlen;
805 #ifdef ICMPPRINTFS
806 	if (icmpprintfs) {
807 		char buf[4 * sizeof("123")];
808 
809 		strlcpy(buf, inet_ntoa(ip->ip_dst), sizeof buf);
810 		printf("icmp_send dst %s src %s\n",
811 		    buf, inet_ntoa(ip->ip_src));
812 	}
813 #endif
814 	(void)ip_output(m, opts, (void *)NULL, 0, (void *)NULL, (void *)NULL);
815 }
816 
817 n_time
818 iptime(void)
819 {
820 	struct timeval atv;
821 	u_long t;
822 
823 	microtime(&atv);
824 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
825 	return (htonl(t));
826 }
827 
828 int
829 icmp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
830     size_t newlen)
831 {
832 
833 	/* All sysctl names at this level are terminal. */
834 	if (namelen != 1)
835 		return (ENOTDIR);
836 
837 	switch (name[0]) {
838 	case ICMPCTL_REDIRTIMEOUT: {
839 		int error;
840 
841 		error = sysctl_int(oldp, oldlenp, newp, newlen,
842 		    &icmp_redirtimeout);
843 		if (icmp_redirect_timeout_q != NULL) {
844 			if (icmp_redirtimeout == 0) {
845 				rt_timer_queue_destroy(icmp_redirect_timeout_q,
846 				    TRUE);
847 				icmp_redirect_timeout_q = NULL;
848 			} else
849 				rt_timer_queue_change(icmp_redirect_timeout_q,
850 				    icmp_redirtimeout);
851 		} else if (icmp_redirtimeout > 0) {
852 			icmp_redirect_timeout_q =
853 			    rt_timer_queue_create(icmp_redirtimeout);
854 		}
855 		return (error);
856 
857 		break;
858 	}
859 	case ICMPCTL_STATS:
860 		if (newp != NULL)
861 			return (EPERM);
862 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
863 		    &icmpstat, sizeof(icmpstat)));
864 	default:
865 		if (name[0] < ICMPCTL_MAXID)
866 			return (sysctl_int_arr(icmpctl_vars, name, namelen,
867 			    oldp, oldlenp, newp, newlen));
868 		return (ENOPROTOOPT);
869 	}
870 	/* NOTREACHED */
871 }
872 
873 
874 struct rtentry *
875 icmp_mtudisc_clone(struct sockaddr *dst, u_int rtableid)
876 {
877 	struct rtentry *rt;
878 	int error;
879 
880 	rt = rtalloc1(dst, 1, rtableid);
881 	if (rt == 0)
882 		return (NULL);
883 
884 	/* If we didn't get a host route, allocate one */
885 
886 	if ((rt->rt_flags & RTF_HOST) == 0) {
887 		struct rtentry *nrt;
888 		struct rt_addrinfo info;
889 
890 		bzero(&info, sizeof(info));
891 		info.rti_info[RTAX_DST] = dst;
892 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
893 		info.rti_flags = RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC;
894 
895 		error = rtrequest1(RTM_ADD, &info, RTP_DEFAULT, &nrt, rtableid);
896 		if (error) {
897 			rtfree(rt);
898 			return (NULL);
899 		}
900 		nrt->rt_rmx = rt->rt_rmx;
901 		rtfree(rt);
902 		rt = nrt;
903 	}
904 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
905 	if (error) {
906 		rtfree(rt);
907 		return (NULL);
908 	}
909 
910 	return (rt);
911 }
912 
913 void
914 icmp_mtudisc(struct icmp *icp, u_int rtableid)
915 {
916 	struct rtentry *rt;
917 	struct sockaddr *dst = sintosa(&icmpsrc);
918 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
919 
920 	/* Table of common MTUs: */
921 
922 	static u_short mtu_table[] = {
923 		65535, 65280, 32000, 17914, 9180, 8166,
924 		4352, 2002, 1492, 1006, 508, 296, 68, 0
925 	};
926 
927 	rt = icmp_mtudisc_clone(dst, rtableid);
928 	if (rt == 0)
929 		return;
930 
931 	if (mtu == 0) {
932 		int i = 0;
933 
934 		mtu = ntohs(icp->icmp_ip.ip_len);
935 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
936 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
937 			mtu -= (icp->icmp_ip.ip_hl << 2);
938 
939 		/* If we still can't guess a value, try the route */
940 
941 		if (mtu == 0) {
942 			mtu = rt->rt_rmx.rmx_mtu;
943 
944 			/* If no route mtu, default to the interface mtu */
945 
946 			if (mtu == 0)
947 				mtu = rt->rt_ifp->if_mtu;
948 		}
949 
950 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
951 			if (mtu > mtu_table[i]) {
952 				mtu = mtu_table[i];
953 				break;
954 			}
955 	}
956 
957 	/*
958 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
959 	 *	  to turn off PMTU for a route, and the kernel can
960 	 *	  set it to indicate a serious problem with PMTU
961 	 *	  on a route.  We should be using a separate flag
962 	 *	  for the kernel to indicate this.
963 	 */
964 
965 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
966 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
967 			rt->rt_rmx.rmx_locks |= RTV_MTU;
968 		else if (rt->rt_rmx.rmx_mtu > mtu ||
969 		    rt->rt_rmx.rmx_mtu == 0)
970 			rt->rt_rmx.rmx_mtu = mtu;
971 	}
972 
973 	rtfree(rt);
974 }
975 
976 /* XXX only handles table 0 right now */
977 void
978 icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
979 {
980 	if (rt == NULL)
981 		panic("icmp_mtudisc_timeout:  bad route to timeout");
982 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
983 	    (RTF_DYNAMIC | RTF_HOST)) {
984 		void *(*ctlfunc)(int, struct sockaddr *, void *);
985 		extern u_char ip_protox[];
986 		struct sockaddr_in sa;
987 		struct rt_addrinfo info;
988 
989 		bzero(&info, sizeof(info));
990 		info.rti_info[RTAX_DST] = rt_key(rt);
991 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
992 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
993 		info.rti_flags = rt->rt_flags;
994 
995 		sa = *(struct sockaddr_in *)rt_key(rt);
996 		rtrequest1(RTM_DELETE, &info, rt->rt_priority, NULL, 0);
997 
998 		/* Notify TCP layer of increased Path MTU estimate */
999 		ctlfunc = inetsw[ip_protox[IPPROTO_TCP]].pr_ctlinput;
1000 		if (ctlfunc)
1001 			(*ctlfunc)(PRC_MTUINC,(struct sockaddr *)&sa, NULL);
1002 	} else
1003 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
1004 			rt->rt_rmx.rmx_mtu = 0;
1005 }
1006 
1007 /*
1008  * Perform rate limit check.
1009  * Returns 0 if it is okay to send the icmp packet.
1010  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
1011  * limitation.
1012  *
1013  * XXX per-destination/type check necessary?
1014  */
1015 int
1016 icmp_ratelimit(const struct in_addr *dst, const int type, const int code)
1017 {
1018 
1019 	/* PPS limit */
1020 	if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
1021 	    icmperrppslim))
1022 		return 1;
1023 
1024 	/*okay to send*/
1025 	return 0;
1026 }
1027 
1028 /* XXX only handles table 0 right now */
1029 void
1030 icmp_redirect_timeout(struct rtentry *rt, struct rttimer *r)
1031 {
1032 	if (rt == NULL)
1033 		panic("icmp_redirect_timeout:  bad route to timeout");
1034 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1035 	    (RTF_DYNAMIC | RTF_HOST)) {
1036 		struct rt_addrinfo info;
1037 
1038 		bzero(&info, sizeof(info));
1039 		info.rti_info[RTAX_DST] = rt_key(rt);
1040 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1041 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1042 		info.rti_flags = rt->rt_flags;
1043 
1044 		rtrequest1(RTM_DELETE, &info, rt->rt_priority, NULL, 0);
1045 	}
1046 }
1047