xref: /dragonfly/sys/netinet/ip_icmp.c (revision 10f4bf95)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
34  * $FreeBSD: src/sys/netinet/ip_icmp.c,v 1.39.2.19 2003/01/24 05:11:34 sam Exp $
35  * $DragonFly: src/sys/netinet/ip_icmp.c,v 1.32 2008/10/27 02:56:30 sephe Exp $
36  */
37 
38 #include "opt_ipsec.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/mbuf.h>
43 #include <sys/protosw.h>
44 #include <sys/socket.h>
45 #include <sys/socketops.h>
46 #include <sys/time.h>
47 #include <sys/kernel.h>
48 #include <sys/sysctl.h>
49 #include <sys/in_cksum.h>
50 
51 #include <machine/stdarg.h>
52 
53 #include <net/if.h>
54 #include <net/if_types.h>
55 #include <net/route.h>
56 
57 #define _IP_VHL
58 #include <netinet/in.h>
59 #include <netinet/in_systm.h>
60 #include <netinet/in_var.h>
61 #include <netinet/ip.h>
62 #include <netinet/ip_icmp.h>
63 #include <netinet/ip_var.h>
64 #include <netinet/icmp_var.h>
65 
66 #ifdef IPSEC
67 #include <netinet6/ipsec.h>
68 #include <netproto/key/key.h>
69 #endif
70 
71 #ifdef FAST_IPSEC
72 #include <netproto/ipsec/ipsec.h>
73 #include <netproto/ipsec/key.h>
74 #define	IPSEC
75 #endif
76 
77 /*
78  * ICMP routines: error generation, receive packet processing, and
79  * routines to turnaround packets back to the originator, and
80  * host table maintenance routines.
81  */
82 
83 struct icmpstat icmpstat;
84 SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW,
85 	&icmpstat, icmpstat, "ICMP statistics");
86 
87 static int	icmpmaskrepl = 0;
88 SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW,
89 	&icmpmaskrepl, 0, "Allow replies to netmask requests");
90 
91 static int	drop_redirect = 0;
92 SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW,
93 	&drop_redirect, 0, "Ignore ICMP redirects");
94 
95 static int	log_redirect = 0;
96 SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW,
97 	&log_redirect, 0, "Enable output about ICMP redirects");
98 
99 #ifdef ICMP_BANDLIM
100 
101 /*
102  * ICMP error-response bandwidth limiting sysctl.  If not enabled, sysctl
103  *      variable content is -1 and read-only.
104  */
105 
106 static int      icmplim = 200;
107 SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW,
108 	&icmplim, 0, "ICMP bandwidth limit");
109 #else
110 
111 static int      icmplim = -1;
112 SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RD,
113 	&icmplim, 0, "ICMP bandwidth limit");
114 
115 #endif
116 
117 static int	icmplim_output = 1;
118 SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW,
119 	&icmplim_output, 0, "Enable output about ICMP bandwidth limits");
120 
121 /*
122  * ICMP broadcast echo sysctl
123  */
124 
125 static int	icmpbmcastecho = 0;
126 SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW,
127     &icmpbmcastecho, 0, "");
128 
129 static char	icmp_reply_src[IFNAMSIZ];
130 SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_RW,
131 	icmp_reply_src, IFNAMSIZ, "icmp reply source for non-local packets.");
132 
133 static int	icmp_rfi;
134 SYSCTL_INT(_net_inet_icmp, OID_AUTO, reply_from_interface, CTLFLAG_RW,
135 	&icmp_rfi, 0, "ICMP reply from incoming interface for "
136 	"non-local packets");
137 
138 #ifdef ICMPPRINTFS
139 int	icmpprintfs = 0;
140 #endif
141 
142 static void	icmp_reflect (struct mbuf *);
143 static void	icmp_send (struct mbuf *, struct mbuf *, struct route *);
144 
145 extern	struct protosw inetsw[];
146 
147 /*
148  * Generate an error packet of type error
149  * in response to bad packet ip.
150  */
151 void
152 icmp_error(struct mbuf *n, int type, int code, n_long dest, int destmtu)
153 {
154 	struct ip *oip = mtod(n, struct ip *), *nip;
155 	unsigned oiplen = IP_VHL_HL(oip->ip_vhl) << 2;
156 	struct icmp *icp;
157 	struct mbuf *m;
158 	unsigned icmplen;
159 
160 #ifdef ICMPPRINTFS
161 	if (icmpprintfs)
162 		kprintf("icmp_error(%p, %d, %d)\n", oip, type, code);
163 #endif
164 	if (type != ICMP_REDIRECT)
165 		icmpstat.icps_error++;
166 	/*
167 	 * Don't send error if the original packet was encrypted.
168 	 * Don't send error if not the first fragment of message.
169 	 * Don't error if the old packet protocol was ICMP
170 	 * error message, only known informational types.
171 	 */
172 	if (n->m_flags & M_DECRYPTED)
173 		goto freeit;
174 	if (oip->ip_off &~ (IP_MF|IP_DF))
175 		goto freeit;
176 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
177 	  n->m_len >= oiplen + ICMP_MINLEN &&
178 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
179 		icmpstat.icps_oldicmp++;
180 		goto freeit;
181 	}
182 	/* Don't send error in response to a multicast or broadcast packet */
183 	if (n->m_flags & (M_BCAST|M_MCAST))
184 		goto freeit;
185 	/*
186 	 * First, formulate icmp message
187 	 */
188 	m = m_gethdr(MB_DONTWAIT, MT_HEADER);
189 	if (m == NULL)
190 		goto freeit;
191 	icmplen = min(oiplen + 8, oip->ip_len);
192 	if (icmplen < sizeof(struct ip))
193 		panic("icmp_error: bad length");
194 	m->m_len = icmplen + ICMP_MINLEN;
195 	MH_ALIGN(m, m->m_len);
196 	icp = mtod(m, struct icmp *);
197 	if ((u_int)type > ICMP_MAXTYPE)
198 		panic("icmp_error");
199 	icmpstat.icps_outhist[type]++;
200 	icp->icmp_type = type;
201 	if (type == ICMP_REDIRECT)
202 		icp->icmp_gwaddr.s_addr = dest;
203 	else {
204 		icp->icmp_void = 0;
205 		/*
206 		 * The following assignments assume an overlay with the
207 		 * zeroed icmp_void field.
208 		 */
209 		if (type == ICMP_PARAMPROB) {
210 			icp->icmp_pptr = code;
211 			code = 0;
212 		} else if (type == ICMP_UNREACH &&
213 			code == ICMP_UNREACH_NEEDFRAG && destmtu) {
214 			icp->icmp_nextmtu = htons(destmtu);
215 		}
216 	}
217 
218 	icp->icmp_code = code;
219 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
220 	nip = &icp->icmp_ip;
221 
222 	/*
223 	 * Convert fields to network representation.
224 	 */
225 	nip->ip_len = htons(nip->ip_len);
226 	nip->ip_off = htons(nip->ip_off);
227 
228 	/*
229 	 * Now, copy old ip header (without options)
230 	 * in front of icmp message.
231 	 */
232 	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
233 		panic("icmp len");
234 	m->m_data -= sizeof(struct ip);
235 	m->m_len += sizeof(struct ip);
236 	m->m_pkthdr.len = m->m_len;
237 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
238 	nip = mtod(m, struct ip *);
239 	bcopy(oip, nip, sizeof(struct ip));
240 	nip->ip_len = m->m_len;
241 	nip->ip_vhl = IP_VHL_BORING;
242 	nip->ip_p = IPPROTO_ICMP;
243 	nip->ip_tos = 0;
244 	m->m_pkthdr.fw_flags |= n->m_pkthdr.fw_flags & FW_MBUF_GENERATED;
245 	icmp_reflect(m);
246 
247 freeit:
248 	m_freem(n);
249 }
250 
251 /*
252  * Process a received ICMP message.
253  */
254 int
255 icmp_input(struct mbuf **mp, int *offp, int proto)
256 {
257 	struct sockaddr_in icmpsrc = { sizeof(struct sockaddr_in), AF_INET };
258 	struct sockaddr_in icmpdst = { sizeof(struct sockaddr_in), AF_INET };
259 	struct sockaddr_in icmpgw = { sizeof(struct sockaddr_in), AF_INET };
260 	struct icmp *icp;
261 	struct in_ifaddr *ia;
262 	struct mbuf *m = *mp;
263 	struct ip *ip = mtod(m, struct ip *);
264 	int icmplen = ip->ip_len;
265 	int i, hlen;
266 	int code;
267 
268 	*mp = NULL;
269 	hlen = *offp;
270 
271 	/*
272 	 * Locate icmp structure in mbuf, and check
273 	 * that not corrupted and of at least minimum length.
274 	 */
275 #ifdef ICMPPRINTFS
276 	if (icmpprintfs) {
277 		char buf[sizeof "aaa.bbb.ccc.ddd"];
278 
279 		strcpy(buf, inet_ntoa(ip->ip_src));
280 		kprintf("icmp_input from %s to %s, len %d\n",
281 		       buf, inet_ntoa(ip->ip_dst), icmplen);
282 	}
283 #endif
284 	if (icmplen < ICMP_MINLEN) {
285 		icmpstat.icps_tooshort++;
286 		goto freeit;
287 	}
288 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
289 	if (m->m_len < i && (m = m_pullup(m, i)) == 0)  {
290 		icmpstat.icps_tooshort++;
291 		return(IPPROTO_DONE);
292 	}
293 	ip = mtod(m, struct ip *);
294 	m->m_len -= hlen;
295 	m->m_data += hlen;
296 	icp = mtod(m, struct icmp *);
297 	if (in_cksum(m, icmplen)) {
298 		icmpstat.icps_checksum++;
299 		goto freeit;
300 	}
301 	m->m_len += hlen;
302 	m->m_data -= hlen;
303 
304 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
305 		/*
306 		 * Deliver very specific ICMP type only.
307 		 */
308 		switch (icp->icmp_type) {
309 		case ICMP_UNREACH:
310 		case ICMP_TIMXCEED:
311 			break;
312 		default:
313 			goto freeit;
314 		}
315 	}
316 
317 #ifdef ICMPPRINTFS
318 	if (icmpprintfs)
319 		kprintf("icmp_input, type %d code %d\n", icp->icmp_type,
320 		    icp->icmp_code);
321 #endif
322 
323 	/*
324 	 * Message type specific processing.
325 	 */
326 	if (icp->icmp_type > ICMP_MAXTYPE)
327 		goto raw;
328 	icmpstat.icps_inhist[icp->icmp_type]++;
329 	code = icp->icmp_code;
330 	switch (icp->icmp_type) {
331 
332 	case ICMP_UNREACH:
333 		switch (code) {
334 			case ICMP_UNREACH_NET:
335 			case ICMP_UNREACH_HOST:
336 			case ICMP_UNREACH_SRCFAIL:
337 			case ICMP_UNREACH_NET_UNKNOWN:
338 			case ICMP_UNREACH_HOST_UNKNOWN:
339 			case ICMP_UNREACH_ISOLATED:
340 			case ICMP_UNREACH_TOSNET:
341 			case ICMP_UNREACH_TOSHOST:
342 			case ICMP_UNREACH_HOST_PRECEDENCE:
343 			case ICMP_UNREACH_PRECEDENCE_CUTOFF:
344 				code = PRC_UNREACH_NET;
345 				break;
346 
347 			case ICMP_UNREACH_NEEDFRAG:
348 				code = PRC_MSGSIZE;
349 				break;
350 
351 			/*
352 			 * RFC 1122, Sections 3.2.2.1 and 4.2.3.9.
353 			 * Treat subcodes 2,3 as immediate RST
354 			 */
355 			case ICMP_UNREACH_PROTOCOL:
356 			case ICMP_UNREACH_PORT:
357 				code = PRC_UNREACH_PORT;
358 				break;
359 
360 			case ICMP_UNREACH_NET_PROHIB:
361 			case ICMP_UNREACH_HOST_PROHIB:
362 			case ICMP_UNREACH_FILTER_PROHIB:
363 				code = PRC_UNREACH_ADMIN_PROHIB;
364 				break;
365 
366 			default:
367 				goto badcode;
368 		}
369 		goto deliver;
370 
371 	case ICMP_TIMXCEED:
372 		if (code > 1)
373 			goto badcode;
374 		code += PRC_TIMXCEED_INTRANS;
375 		goto deliver;
376 
377 	case ICMP_PARAMPROB:
378 		if (code > 1)
379 			goto badcode;
380 		code = PRC_PARAMPROB;
381 		goto deliver;
382 
383 	case ICMP_SOURCEQUENCH:
384 		if (code)
385 			goto badcode;
386 		code = PRC_QUENCH;
387 deliver:
388 		/*
389 		 * Problem with datagram; advise higher level routines.
390 		 */
391 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
392 		    IP_VHL_HL(icp->icmp_ip.ip_vhl) < (sizeof(struct ip) >> 2)) {
393 			icmpstat.icps_badlen++;
394 			goto freeit;
395 		}
396 		icp->icmp_ip.ip_len = ntohs(icp->icmp_ip.ip_len);
397 		/* Discard ICMP's in response to multicast packets */
398 		if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr)))
399 			goto badcode;
400 #ifdef ICMPPRINTFS
401 		if (icmpprintfs)
402 			kprintf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
403 #endif
404 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
405 #if 1
406 		/*
407 		 * MTU discovery:
408 		 * If we got a needfrag and there is a host route to the
409 		 * original destination, and the MTU is not locked, then
410 		 * set the MTU in the route to the suggested new value
411 		 * (if given) and then notify as usual.  The ULPs will
412 		 * notice that the MTU has changed and adapt accordingly.
413 		 * If no new MTU was suggested, then we guess a new one
414 		 * less than the current value.  If the new MTU is
415 		 * unreasonably small (arbitrarily set at 296), then
416 		 * we reset the MTU to the interface value and enable the
417 		 * lock bit, indicating that we are no longer doing MTU
418 		 * discovery.
419 		 */
420 		if (code == PRC_MSGSIZE) {
421 			struct rtentry *rt;
422 			int mtu;
423 
424 			rt = rtpurelookup((struct sockaddr *)&icmpsrc);
425 			if (rt != NULL && (rt->rt_flags & RTF_HOST) &&
426 			    !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
427 				mtu = ntohs(icp->icmp_nextmtu);
428 				if (!mtu)
429 					mtu = ip_next_mtu(rt->rt_rmx.rmx_mtu,
430 							  1);
431 #ifdef DEBUG_MTUDISC
432 				kprintf("MTU for %s reduced to %d\n",
433 					inet_ntoa(icmpsrc.sin_addr), mtu);
434 #endif
435 				if (mtu < 296) {
436 					/* rt->rt_rmx.rmx_mtu =
437 						rt->rt_ifp->if_mtu; */
438 					rt->rt_rmx.rmx_locks |= RTV_MTU;
439 				} else if (rt->rt_rmx.rmx_mtu > mtu) {
440 					rt->rt_rmx.rmx_mtu = mtu;
441 				}
442 			}
443 			if (rt != NULL)
444 				--rt->rt_refcnt;
445 		}
446 #endif
447 		/*
448 		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
449 		 * notification to TCP layer.
450 		 */
451 		so_pru_ctlinput(
452 			&inetsw[ip_protox[icp->icmp_ip.ip_p]],
453 			code, (struct sockaddr *)&icmpsrc, &icp->icmp_ip);
454 		break;
455 
456 badcode:
457 		icmpstat.icps_badcode++;
458 		break;
459 
460 	case ICMP_ECHO:
461 		if (!icmpbmcastecho
462 		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
463 			icmpstat.icps_bmcastecho++;
464 			break;
465 		}
466 		icp->icmp_type = ICMP_ECHOREPLY;
467 #ifdef ICMP_BANDLIM
468 		if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0)
469 			goto freeit;
470 		else
471 #endif
472 			goto reflect;
473 
474 	case ICMP_TSTAMP:
475 		if (!icmpbmcastecho
476 		    && (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
477 			icmpstat.icps_bmcasttstamp++;
478 			break;
479 		}
480 		if (icmplen < ICMP_TSLEN) {
481 			icmpstat.icps_badlen++;
482 			break;
483 		}
484 		icp->icmp_type = ICMP_TSTAMPREPLY;
485 		icp->icmp_rtime = iptime();
486 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
487 #ifdef ICMP_BANDLIM
488 		if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0)
489 			goto freeit;
490 		else
491 #endif
492 			goto reflect;
493 
494 	case ICMP_MASKREQ:
495 		if (icmpmaskrepl == 0)
496 			break;
497 		/*
498 		 * We are not able to respond with all ones broadcast
499 		 * unless we receive it over a point-to-point interface.
500 		 */
501 		if (icmplen < ICMP_MASKLEN)
502 			break;
503 		switch (ip->ip_dst.s_addr) {
504 
505 		case INADDR_BROADCAST:
506 		case INADDR_ANY:
507 			icmpdst.sin_addr = ip->ip_src;
508 			break;
509 
510 		default:
511 			icmpdst.sin_addr = ip->ip_dst;
512 		}
513 		ia = (struct in_ifaddr *)ifaof_ifpforaddr(
514 			    (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif);
515 		if (ia == 0)
516 			break;
517 		if (ia->ia_ifp == 0)
518 			break;
519 		icp->icmp_type = ICMP_MASKREPLY;
520 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
521 		if (ip->ip_src.s_addr == 0) {
522 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
523 			    ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr;
524 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
525 			    ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr;
526 		}
527 reflect:
528 		ip->ip_len += hlen;	/* since ip_input deducts this */
529 		icmpstat.icps_reflect++;
530 		icmpstat.icps_outhist[icp->icmp_type]++;
531 		icmp_reflect(m);
532 		return(IPPROTO_DONE);
533 
534 	case ICMP_REDIRECT:
535 		if (log_redirect) {
536 			u_long src, dst, gw;
537 
538 			src = ntohl(ip->ip_src.s_addr);
539 			dst = ntohl(icp->icmp_ip.ip_dst.s_addr);
540 			gw = ntohl(icp->icmp_gwaddr.s_addr);
541 			kprintf("icmp redirect from %d.%d.%d.%d: "
542 			       "%d.%d.%d.%d => %d.%d.%d.%d\n",
543 			       (int)(src >> 24), (int)((src >> 16) & 0xff),
544 			       (int)((src >> 8) & 0xff), (int)(src & 0xff),
545 			       (int)(dst >> 24), (int)((dst >> 16) & 0xff),
546 			       (int)((dst >> 8) & 0xff), (int)(dst & 0xff),
547 			       (int)(gw >> 24), (int)((gw >> 16) & 0xff),
548 			       (int)((gw >> 8) & 0xff), (int)(gw & 0xff));
549 		}
550 		if (drop_redirect)
551 			break;
552 		if (code > 3)
553 			goto badcode;
554 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
555 		    IP_VHL_HL(icp->icmp_ip.ip_vhl) < (sizeof(struct ip) >> 2)) {
556 			icmpstat.icps_badlen++;
557 			break;
558 		}
559 		/*
560 		 * Short circuit routing redirects to force
561 		 * immediate change in the kernel's routing
562 		 * tables.  The message is also handed to anyone
563 		 * listening on a raw socket (e.g. the routing
564 		 * daemon for use in updating its tables).
565 		 */
566 		icmpgw.sin_addr = ip->ip_src;
567 		icmpdst.sin_addr = icp->icmp_gwaddr;
568 #ifdef	ICMPPRINTFS
569 		if (icmpprintfs) {
570 			char buf[sizeof "aaa.bbb.ccc.ddd"];
571 
572 			strcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst));
573 			kprintf("redirect dst %s to %s\n",
574 			       buf, inet_ntoa(icp->icmp_gwaddr));
575 		}
576 #endif
577 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
578 		rtredirect((struct sockaddr *)&icmpsrc,
579 		  (struct sockaddr *)&icmpdst,
580 		  NULL, RTF_GATEWAY | RTF_HOST,
581 		  (struct sockaddr *)&icmpgw);
582 		kpfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc);
583 #ifdef IPSEC
584 		key_sa_routechange((struct sockaddr *)&icmpsrc);
585 #endif
586 		break;
587 
588 	/*
589 	 * No kernel processing for the following;
590 	 * just fall through to send to raw listener.
591 	 */
592 	case ICMP_ECHOREPLY:
593 	case ICMP_ROUTERADVERT:
594 	case ICMP_ROUTERSOLICIT:
595 	case ICMP_TSTAMPREPLY:
596 	case ICMP_IREQREPLY:
597 	case ICMP_MASKREPLY:
598 	default:
599 		break;
600 	}
601 
602 raw:
603 	*mp = m;
604 	rip_input(mp, offp, proto);
605 	return(IPPROTO_DONE);
606 
607 freeit:
608 	m_freem(m);
609 	return(IPPROTO_DONE);
610 }
611 
612 /*
613  * Reflect the ip packet back to the source
614  */
615 static void
616 icmp_reflect(struct mbuf *m)
617 {
618 	struct ip *ip = mtod(m, struct ip *);
619 	struct in_ifaddr *ia;
620 	struct in_ifaddr_container *iac;
621 	struct ifaddr_container *ifac;
622 	struct ifnet *ifp;
623 	struct in_addr t;
624 	struct mbuf *opts = 0;
625 	int optlen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof(struct ip);
626 	struct route *ro = NULL, rt;
627 
628 	if (!in_canforward(ip->ip_src) &&
629 	    ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) !=
630 	     (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
631 		m_freem(m);	/* Bad return address */
632 		icmpstat.icps_badaddr++;
633 		goto done;	/* Ip_output() will check for broadcast */
634 	}
635 	t = ip->ip_dst;
636 	ip->ip_dst = ip->ip_src;
637 
638 	ro = &rt;
639 	bzero(ro, sizeof *ro);
640 
641 	/*
642 	 * If the incoming packet was addressed directly to us,
643 	 * use dst as the src for the reply.  Otherwise (broadcast
644 	 * or anonymous), use the address which corresponds
645 	 * to the incoming interface.
646 	 */
647 	ia = NULL;
648 	LIST_FOREACH(iac, INADDR_HASH(t.s_addr), ia_hash) {
649 		if (t.s_addr == IA_SIN(iac->ia)->sin_addr.s_addr) {
650 			ia = iac->ia;
651 			goto match;
652 		}
653 	}
654 	ifp = m->m_pkthdr.rcvif;
655 	if (ifp != NULL && (ifp->if_flags & IFF_BROADCAST)) {
656 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
657 			struct ifaddr *ifa = ifac->ifa;
658 
659 			if (ifa->ifa_addr->sa_family != AF_INET)
660 				continue;
661 			ia = ifatoia(ifa);
662 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
663 			    t.s_addr)
664 				goto match;
665 		}
666 	}
667 	/*
668 	 * If the packet was transiting through us, use the address of
669 	 * the interface the packet came through in.  If that interface
670 	 * doesn't have a suitable IP address, the normal selection
671 	 * criteria apply.
672 	 */
673 	if (icmp_rfi && ifp != NULL) {
674 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
675 			struct ifaddr *ifa = ifac->ifa;
676 
677 			if (ifa->ifa_addr->sa_family != AF_INET)
678 				continue;
679 			ia = ifatoia(ifa);
680 			goto match;
681 		}
682 	}
683 	/*
684 	 * If the incoming packet was not addressed directly to us, use
685 	 * designated interface for icmp replies specified by sysctl
686 	 * net.inet.icmp.reply_src (default not set). Otherwise continue
687 	 * with normal source selection.
688 	 */
689 	if (icmp_reply_src[0] != '\0' && (ifp = ifunit(icmp_reply_src))) {
690 		TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
691 			struct ifaddr *ifa = ifac->ifa;
692 
693 			if (ifa->ifa_addr->sa_family != AF_INET)
694 				continue;
695 			ia = ifatoia(ifa);
696 			goto match;
697 		}
698 	}
699 	/*
700 	 * If the packet was transiting through us, use the address of
701 	 * the interface that is the closest to the packet source.
702 	 * When we don't have a route back to the packet source, stop here
703 	 * and drop the packet.
704 	 */
705 	ia = ip_rtaddr(ip->ip_dst, ro);
706 	if (ia == NULL) {
707 		m_freem(m);
708 		icmpstat.icps_noroute++;
709 		goto done;
710 	}
711 match:
712 	t = IA_SIN(ia)->sin_addr;
713 	ip->ip_src = t;
714 	ip->ip_ttl = ip_defttl;
715 
716 	if (optlen > 0) {
717 		u_char *cp;
718 		int opt, cnt;
719 		u_int len;
720 
721 		/*
722 		 * Retrieve any source routing from the incoming packet;
723 		 * add on any record-route or timestamp options.
724 		 */
725 		cp = (u_char *) (ip + 1);
726 		if ((opts = ip_srcroute(m)) == 0 &&
727 		    (opts = m_gethdr(MB_DONTWAIT, MT_HEADER))) {
728 			opts->m_len = sizeof(struct in_addr);
729 			mtod(opts, struct in_addr *)->s_addr = 0;
730 		}
731 		if (opts) {
732 #ifdef ICMPPRINTFS
733 			if (icmpprintfs)
734 				kprintf("icmp_reflect optlen %d rt %d => ",
735 				       optlen, opts->m_len);
736 #endif
737 			for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
738 				opt = cp[IPOPT_OPTVAL];
739 				if (opt == IPOPT_EOL)
740 					break;
741 				if (opt == IPOPT_NOP)
742 					len = 1;
743 				else {
744 					if (cnt < IPOPT_OLEN + sizeof *cp)
745 						break;
746 					len = cp[IPOPT_OLEN];
747 					if (len < IPOPT_OLEN + sizeof *cp ||
748 					    len > cnt)
749 					break;
750 				}
751 				/*
752 				 * Should check for overflow, but it
753 				 * "can't happen".
754 				 */
755 				if (opt == IPOPT_RR || opt == IPOPT_TS ||
756 				    opt == IPOPT_SECURITY) {
757 					bcopy(cp,
758 					      mtod(opts, caddr_t) + opts->m_len,
759 					      len);
760 					opts->m_len += len;
761 				}
762 			}
763 			/* Terminate & pad, if necessary */
764 			cnt = opts->m_len % 4;
765 			if (cnt) {
766 				for (; cnt < 4; cnt++) {
767 					*(mtod(opts, caddr_t) + opts->m_len) =
768 					    IPOPT_EOL;
769 					opts->m_len++;
770 				}
771 			}
772 #ifdef ICMPPRINTFS
773 			if (icmpprintfs)
774 				kprintf("%d\n", opts->m_len);
775 #endif
776 		}
777 		/*
778 		 * Now strip out original options by copying rest of first
779 		 * mbuf's data back, and adjust the IP length.
780 		 */
781 		ip->ip_len -= optlen;
782 		ip->ip_vhl = IP_VHL_BORING;
783 		m->m_len -= optlen;
784 		if (m->m_flags & M_PKTHDR)
785 			m->m_pkthdr.len -= optlen;
786 		optlen += sizeof(struct ip);
787 		bcopy((caddr_t)ip + optlen, ip + 1,
788 		      m->m_len - sizeof(struct ip));
789 	}
790 	m->m_pkthdr.fw_flags &= FW_MBUF_GENERATED;
791 	m->m_flags &= ~(M_BCAST|M_MCAST);
792 	icmp_send(m, opts, ro);
793 done:
794 	if (opts)
795 		m_free(opts);
796 	if (ro && ro->ro_rt)
797 		RTFREE(ro->ro_rt);
798 }
799 
800 /*
801  * Send an icmp packet back to the ip level,
802  * after supplying a checksum.
803  */
804 static void
805 icmp_send(struct mbuf *m, struct mbuf *opts, struct route *rt)
806 {
807 	struct ip *ip = mtod(m, struct ip *);
808 	struct icmp *icp;
809 	int hlen;
810 
811 	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
812 	m->m_data += hlen;
813 	m->m_len -= hlen;
814 	icp = mtod(m, struct icmp *);
815 	icp->icmp_cksum = 0;
816 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
817 	m->m_data -= hlen;
818 	m->m_len += hlen;
819 	m->m_pkthdr.rcvif = NULL;
820 #ifdef ICMPPRINTFS
821 	if (icmpprintfs) {
822 		char buf[sizeof "aaa.bbb.ccc.ddd"];
823 
824 		strcpy(buf, inet_ntoa(ip->ip_dst));
825 		kprintf("icmp_send dst %s src %s\n", buf, inet_ntoa(ip->ip_src));
826 	}
827 #endif
828 	ip_output(m, opts, rt, 0, NULL, NULL);
829 }
830 
831 n_time
832 iptime(void)
833 {
834 	struct timeval atv;
835 	u_long t;
836 
837 	getmicrotime(&atv);
838 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
839 	return (htonl(t));
840 }
841 
842 #if 1
843 /*
844  * Return the next larger or smaller MTU plateau (table from RFC 1191)
845  * given current value MTU.  If DIR is less than zero, a larger plateau
846  * is returned; otherwise, a smaller value is returned.
847  */
848 int
849 ip_next_mtu(int mtu, int dir)
850 {
851 	static int mtutab[] = {
852 		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
853 		68, 0
854 	};
855 	int i;
856 
857 	for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
858 		if (mtu >= mtutab[i])
859 			break;
860 	}
861 
862 	if (dir < 0) {
863 		if (i == 0) {
864 			return 0;
865 		} else {
866 			return mtutab[i - 1];
867 		}
868 	} else {
869 		if (mtutab[i] == 0) {
870 			return 0;
871 		} else if(mtu > mtutab[i]) {
872 			return mtutab[i];
873 		} else {
874 			return mtutab[i + 1];
875 		}
876 	}
877 }
878 #endif
879 
880 #ifdef ICMP_BANDLIM
881 /*
882  * badport_bandlim() - check for ICMP bandwidth limit
883  *
884  *	Return 0 if it is ok to send an ICMP error response, -1 if we have
885  *	hit our bandwidth limit and it is not ok.
886  *
887  *	If icmplim is <= 0, the feature is disabled and 0 is returned.
888  *
889  *	For now we separate the TCP and UDP subsystems w/ different 'which'
890  *	values.  We may eventually remove this separation (and simplify the
891  *	code further).
892  *
893  *	Note that the printing of the error message is delayed so we can
894  *	properly print the icmp error rate that the system was trying to do
895  *	(i.e. 22000/100 pps, etc...).  This can cause long delays in printing
896  *	the 'final' error, but it doesn't make sense to solve the printing
897  *	delay with more complex code.
898  */
899 int
900 badport_bandlim(int which)
901 {
902 	static int lticks[BANDLIM_MAX + 1];
903 	static int lpackets[BANDLIM_MAX + 1];
904 	int dticks;
905 	const char *bandlimittype[] = {
906 		"Limiting icmp unreach response",
907 		"Limiting icmp ping response",
908 		"Limiting icmp tstamp response",
909 		"Limiting closed port RST response",
910 		"Limiting open port RST response"
911 		};
912 
913 	/*
914 	 * Return ok status if feature disabled or argument out of
915 	 * ranage.
916 	 */
917 
918 	if (icmplim <= 0 || which > BANDLIM_MAX || which < 0)
919 		return(0);
920 	dticks = ticks - lticks[which];
921 
922 	/*
923 	 * reset stats when cumulative dt exceeds one second.
924 	 */
925 
926 	if ((unsigned int)dticks > hz) {
927 		if (lpackets[which] > icmplim && icmplim_output) {
928 			kprintf("%s from %d to %d packets per second\n",
929 				bandlimittype[which],
930 				lpackets[which],
931 				icmplim
932 			);
933 		}
934 		lticks[which] = ticks;
935 		lpackets[which] = 0;
936 	}
937 
938 	/*
939 	 * bump packet count
940 	 */
941 
942 	if (++lpackets[which] > icmplim) {
943 		return(-1);
944 	}
945 	return(0);
946 }
947 #endif
948