xref: /openbsd/sys/netinet/ip_icmp.c (revision 898184e3)
1 /*	$OpenBSD: ip_icmp.c,v 1.96 2012/09/18 12:35:51 blambert Exp $	*/
2 /*	$NetBSD: ip_icmp.c,v 1.19 1996/02/13 23:42:22 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  *	This product includes software developed by the University of
46  *	California, Berkeley and its contributors.
47  *	This product includes software developed at the Information
48  *	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include "carp.h"
72 #include "pf.h"
73 
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/mbuf.h>
77 #include <sys/protosw.h>
78 #include <sys/socket.h>
79 #include <sys/proc.h>
80 #include <sys/sysctl.h>
81 
82 #include <net/if.h>
83 #include <net/route.h>
84 
85 #include <netinet/in.h>
86 #include <netinet/in_systm.h>
87 #include <netinet/in_var.h>
88 #include <netinet/ip.h>
89 #include <netinet/ip_icmp.h>
90 #include <netinet/ip_var.h>
91 #include <netinet/icmp_var.h>
92 
93 #if NCARP > 0
94 #include <net/if_types.h>
95 #include <netinet/ip_carp.h>
96 #endif
97 
98 #if NPF > 0
99 #include <net/pfvar.h>
100 #endif
101 
102 /*
103  * ICMP routines: error generation, receive packet processing, and
104  * routines to turnaround packets back to the originator, and
105  * host table maintenance routines.
106  */
107 
108 int	icmpmaskrepl = 0;
109 int	icmpbmcastecho = 0;
110 int	icmptstamprepl = 1;
111 #ifdef ICMPPRINTFS
112 int	icmpprintfs = 0;
113 #endif
114 int	icmperrppslim = 100;
115 int	icmperrpps_count = 0;
116 struct timeval icmperrppslim_last;
117 int	icmp_rediraccept = 0;
118 int	icmp_redirtimeout = 10 * 60;
119 static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
120 struct	icmpstat icmpstat;
121 
122 int *icmpctl_vars[ICMPCTL_MAXID] = ICMPCTL_VARS;
123 
124 void icmp_mtudisc_timeout(struct rtentry *, struct rttimer *);
125 int icmp_ratelimit(const struct in_addr *, const int, const int);
126 void icmp_redirect_timeout(struct rtentry *, struct rttimer *);
127 
128 extern	struct protosw inetsw[];
129 
130 void
131 icmp_init(void)
132 {
133 	/*
134 	 * This is only useful if the user initializes redirtimeout to
135 	 * something other than zero.
136 	 */
137 	if (icmp_redirtimeout != 0) {
138 		icmp_redirect_timeout_q =
139 		    rt_timer_queue_create(icmp_redirtimeout);
140 	}
141 }
142 
143 struct mbuf *
144 icmp_do_error(struct mbuf *n, int type, int code, n_long dest, int destmtu)
145 {
146 	struct ip *oip = mtod(n, struct ip *), *nip;
147 	unsigned oiplen = oip->ip_hl << 2;
148 	struct icmp *icp;
149 	struct mbuf *m;
150 	unsigned icmplen, mblen;
151 
152 #ifdef ICMPPRINTFS
153 	if (icmpprintfs)
154 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
155 #endif
156 	if (type != ICMP_REDIRECT)
157 		icmpstat.icps_error++;
158 	/*
159 	 * Don't send error if not the first fragment of message.
160 	 * Don't error if the old packet protocol was ICMP
161 	 * error message, only known informational types.
162 	 */
163 	if (oip->ip_off & htons(IP_OFFMASK))
164 		goto freeit;
165 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
166 	    n->m_len >= oiplen + ICMP_MINLEN &&
167 	    !ICMP_INFOTYPE(((struct icmp *)
168 	    ((caddr_t)oip + oiplen))->icmp_type)) {
169 		icmpstat.icps_oldicmp++;
170 		goto freeit;
171 	}
172 	/* Don't send error in response to a multicast or broadcast packet */
173 	if (n->m_flags & (M_BCAST|M_MCAST))
174 		goto freeit;
175 
176 	/*
177 	 * First, do a rate limitation check.
178 	 */
179 	if (icmp_ratelimit(&oip->ip_src, type, code))
180 		goto freeit;	/* XXX stat */
181 
182 	/*
183 	 * Now, formulate icmp message
184 	 */
185 	icmplen = oiplen + min(8, ntohs(oip->ip_len));
186 	/*
187 	 * Defend against mbuf chains shorter than oip->ip_len:
188 	 */
189 	mblen = 0;
190 	for (m = n; m && (mblen < icmplen); m = m->m_next)
191 		mblen += m->m_len;
192 	icmplen = min(mblen, icmplen);
193 
194 	/*
195 	 * As we are not required to return everything we have,
196 	 * we return whatever we can return at ease.
197 	 *
198 	 * Note that ICMP datagrams longer than 576 octets are out of spec
199 	 * according to RFC1812;
200 	 */
201 
202 	KASSERT(ICMP_MINLEN <= MCLBYTES);
203 
204 	if (icmplen + ICMP_MINLEN > MCLBYTES)
205 		icmplen = MCLBYTES - ICMP_MINLEN - sizeof (struct ip);
206 
207 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
208 	if (m && (sizeof (struct ip) + icmplen + ICMP_MINLEN > MHLEN)) {
209 		MCLGET(m, M_DONTWAIT);
210 		if ((m->m_flags & M_EXT) == 0) {
211 			m_freem(m);
212 			m = NULL;
213 		}
214 	}
215 	if (m == NULL)
216 		goto freeit;
217 	/* keep in same domain and rtable (the latter is a bit unclear) */
218 	m->m_pkthdr.rdomain = n->m_pkthdr.rdomain;
219 	m->m_len = icmplen + ICMP_MINLEN;
220 	if ((m->m_flags & M_EXT) == 0)
221 		MH_ALIGN(m, m->m_len);
222 	icp = mtod(m, struct icmp *);
223 	if ((u_int)type > ICMP_MAXTYPE)
224 		panic("icmp_error");
225 	icmpstat.icps_outhist[type]++;
226 	icp->icmp_type = type;
227 	if (type == ICMP_REDIRECT)
228 		icp->icmp_gwaddr.s_addr = dest;
229 	else {
230 		icp->icmp_void = 0;
231 		/*
232 		 * The following assignments assume an overlay with the
233 		 * zeroed icmp_void field.
234 		 */
235 		if (type == ICMP_PARAMPROB) {
236 			icp->icmp_pptr = code;
237 			code = 0;
238 		} else if (type == ICMP_UNREACH &&
239 		    code == ICMP_UNREACH_NEEDFRAG && destmtu)
240 			icp->icmp_nextmtu = htons(destmtu);
241 	}
242 
243 	icp->icmp_code = code;
244 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
245 
246 	/*
247 	 * Now, copy old ip header (without options)
248 	 * in front of icmp message.
249 	 */
250 	if ((m->m_flags & M_EXT) == 0 &&
251 	    m->m_data - sizeof(struct ip) < m->m_pktdat)
252 		panic("icmp len");
253 	m->m_data -= sizeof(struct ip);
254 	m->m_len += sizeof(struct ip);
255 	m->m_pkthdr.len = m->m_len;
256 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
257 	nip = mtod(m, struct ip *);
258 	/* ip_v set in ip_output */
259 	nip->ip_hl = sizeof(struct ip) >> 2;
260 	nip->ip_tos = 0;
261 	nip->ip_len = htons(m->m_len);
262 	/* ip_id set in ip_output */
263 	nip->ip_off = 0;
264 	/* ip_ttl set in icmp_reflect */
265 	nip->ip_p = IPPROTO_ICMP;
266 	nip->ip_src = oip->ip_src;
267 	nip->ip_dst = oip->ip_dst;
268 
269 	/* move PF_GENERATED to new packet, if existent XXX preserve more? */
270 	if (n->m_pkthdr.pf.flags & PF_TAG_GENERATED)
271 		m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
272 
273 	m_freem(n);
274 	return (m);
275 
276 freeit:
277 	m_freem(n);
278 	return (NULL);
279 }
280 
281 /*
282  * Generate an error packet of type error
283  * in response to bad packet ip.
284  *
285  * The ip packet inside has ip_off and ip_len in host byte order.
286  */
287 void
288 icmp_error(struct mbuf *n, int type, int code, n_long dest, int destmtu)
289 {
290 	struct mbuf *m;
291 
292 	m = icmp_do_error(n, type, code, dest, destmtu);
293 	if (m != NULL)
294 		if (!icmp_reflect(m, NULL, NULL))
295 			icmp_send(m, NULL);
296 }
297 
298 struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
299 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
300 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
301 
302 /*
303  * Process a received ICMP message.
304  */
305 void
306 icmp_input(struct mbuf *m, ...)
307 {
308 	struct icmp *icp;
309 	struct ip *ip = mtod(m, struct ip *);
310 	int icmplen;
311 	int i;
312 	struct in_ifaddr *ia;
313 	void *(*ctlfunc)(int, struct sockaddr *, u_int, void *);
314 	int code;
315 	extern u_char ip_protox[];
316 	extern int ipforwarding;
317 	int hlen;
318 	va_list ap;
319 	struct rtentry *rt;
320 	struct mbuf *opts;
321 
322 	va_start(ap, m);
323 	hlen = va_arg(ap, int);
324 	va_end(ap);
325 
326 	/*
327 	 * Locate icmp structure in mbuf, and check
328 	 * that not corrupted and of at least minimum length.
329 	 */
330 	icmplen = ntohs(ip->ip_len) - hlen;
331 #ifdef ICMPPRINTFS
332 	if (icmpprintfs) {
333 		char buf[4 * sizeof("123")];
334 
335 		strlcpy(buf, inet_ntoa(ip->ip_dst), sizeof buf);
336 		printf("icmp_input from %s to %s, len %d\n",
337 		    inet_ntoa(ip->ip_src), buf, icmplen);
338 	}
339 #endif
340 	if (icmplen < ICMP_MINLEN) {
341 		icmpstat.icps_tooshort++;
342 		goto freeit;
343 	}
344 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
345 	if (m->m_len < i && (m = m_pullup(m, i)) == NULL) {
346 		icmpstat.icps_tooshort++;
347 		return;
348 	}
349 	ip = mtod(m, struct ip *);
350 	m->m_len -= hlen;
351 	m->m_data += hlen;
352 	icp = mtod(m, struct icmp *);
353 	if (in_cksum(m, icmplen)) {
354 		icmpstat.icps_checksum++;
355 		goto freeit;
356 	}
357 	m->m_len += hlen;
358 	m->m_data -= hlen;
359 
360 #ifdef ICMPPRINTFS
361 	/*
362 	 * Message type specific processing.
363 	 */
364 	if (icmpprintfs)
365 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
366 		    icp->icmp_code);
367 #endif
368 	if (icp->icmp_type > ICMP_MAXTYPE)
369 		goto raw;
370 	icmpstat.icps_inhist[icp->icmp_type]++;
371 	code = icp->icmp_code;
372 	switch (icp->icmp_type) {
373 
374 	case ICMP_UNREACH:
375 		switch (code) {
376 		case ICMP_UNREACH_NET:
377 		case ICMP_UNREACH_HOST:
378 		case ICMP_UNREACH_PROTOCOL:
379 		case ICMP_UNREACH_PORT:
380 		case ICMP_UNREACH_SRCFAIL:
381 			code += PRC_UNREACH_NET;
382 			break;
383 
384 		case ICMP_UNREACH_NEEDFRAG:
385 			code = PRC_MSGSIZE;
386 			break;
387 
388 		case ICMP_UNREACH_NET_UNKNOWN:
389 		case ICMP_UNREACH_NET_PROHIB:
390 		case ICMP_UNREACH_TOSNET:
391 			code = PRC_UNREACH_NET;
392 			break;
393 
394 		case ICMP_UNREACH_HOST_UNKNOWN:
395 		case ICMP_UNREACH_ISOLATED:
396 		case ICMP_UNREACH_HOST_PROHIB:
397 		case ICMP_UNREACH_TOSHOST:
398 		case ICMP_UNREACH_FILTER_PROHIB:
399 		case ICMP_UNREACH_HOST_PRECEDENCE:
400 		case ICMP_UNREACH_PRECEDENCE_CUTOFF:
401 			code = PRC_UNREACH_HOST;
402 			break;
403 
404 		default:
405 			goto badcode;
406 		}
407 		goto deliver;
408 
409 	case ICMP_TIMXCEED:
410 		if (code > 1)
411 			goto badcode;
412 		code += PRC_TIMXCEED_INTRANS;
413 		goto deliver;
414 
415 	case ICMP_PARAMPROB:
416 		if (code > 1)
417 			goto badcode;
418 		code = PRC_PARAMPROB;
419 		goto deliver;
420 
421 	case ICMP_SOURCEQUENCH:
422 		if (code)
423 			goto badcode;
424 		code = PRC_QUENCH;
425 	deliver:
426 		/* Free packet atttributes */
427 		if (m->m_flags & M_PKTHDR)
428 			m_tag_delete_chain(m);
429 
430 		/*
431 		 * Problem with datagram; advise higher level routines.
432 		 */
433 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
434 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
435 			icmpstat.icps_badlen++;
436 			goto freeit;
437 		}
438 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
439 			goto badcode;
440 #ifdef INET6
441 		/* Get more contiguous data for a v6 in v4 ICMP message. */
442 		if (icp->icmp_ip.ip_p == IPPROTO_IPV6) {
443 			if (icmplen < ICMP_V6ADVLENMIN ||
444 			    icmplen < ICMP_V6ADVLEN(icp)) {
445 				icmpstat.icps_badlen++;
446 				goto freeit;
447 			} else {
448 				if ((m = m_pullup(m, (ip->ip_hl << 2) +
449 				    ICMP_V6ADVLEN(icp))) == NULL) {
450 					icmpstat.icps_tooshort++;
451 					return;
452 				}
453 				ip = mtod(m, struct ip *);
454 				icp = (struct icmp *)
455 				    (m->m_data + (ip->ip_hl << 2));
456 			}
457 		}
458 #endif /* INET6 */
459 #ifdef ICMPPRINTFS
460 		if (icmpprintfs)
461 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
462 #endif
463 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
464 #if NCARP > 0
465 		if (m->m_pkthdr.rcvif->if_type == IFT_CARP &&
466 		    carp_lsdrop(m, AF_INET, &icmpsrc.sin_addr.s_addr,
467 		    &ip->ip_dst.s_addr))
468 			goto freeit;
469 #endif
470 		/*
471 		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
472 		 * notification to TCP layer.
473 		 */
474 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
475 		if (ctlfunc)
476 			(*ctlfunc)(code, sintosa(&icmpsrc), m->m_pkthdr.rdomain,
477 			    &icp->icmp_ip);
478 		break;
479 
480 	badcode:
481 		icmpstat.icps_badcode++;
482 		break;
483 
484 	case ICMP_ECHO:
485 		if (!icmpbmcastecho &&
486 		    (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
487 			icmpstat.icps_bmcastecho++;
488 			break;
489 		}
490 		icp->icmp_type = ICMP_ECHOREPLY;
491 		goto reflect;
492 
493 	case ICMP_TSTAMP:
494 		if (icmptstamprepl == 0)
495 			break;
496 
497 		if (!icmpbmcastecho &&
498 		    (m->m_flags & (M_MCAST | M_BCAST)) != 0) {
499 			icmpstat.icps_bmcastecho++;
500 			break;
501 		}
502 		if (icmplen < ICMP_TSLEN) {
503 			icmpstat.icps_badlen++;
504 			break;
505 		}
506 		icp->icmp_type = ICMP_TSTAMPREPLY;
507 		icp->icmp_rtime = iptime();
508 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
509 		goto reflect;
510 
511 	case ICMP_MASKREQ:
512 		if (icmpmaskrepl == 0)
513 			break;
514 		if (icmplen < ICMP_MASKLEN) {
515 			icmpstat.icps_badlen++;
516 			break;
517 		}
518 		/*
519 		 * We are not able to respond with all ones broadcast
520 		 * unless we receive it over a point-to-point interface.
521 		 */
522 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
523 		    ip->ip_dst.s_addr == INADDR_ANY)
524 			icmpdst.sin_addr = ip->ip_src;
525 		else
526 			icmpdst.sin_addr = ip->ip_dst;
527 		if (m->m_pkthdr.rcvif == NULL)
528 			break;
529 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
530 		    m->m_pkthdr.rcvif));
531 		if (ia == 0)
532 			break;
533 		icp->icmp_type = ICMP_MASKREPLY;
534 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
535 		if (ip->ip_src.s_addr == 0) {
536 			if (ia->ia_ifp->if_flags & IFF_BROADCAST) {
537 				if (ia->ia_broadaddr.sin_addr.s_addr)
538 					ip->ip_src = ia->ia_broadaddr.sin_addr;
539 				else
540 					ip->ip_src.s_addr = INADDR_BROADCAST;
541 			}
542 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
543 				ip->ip_src = ia->ia_dstaddr.sin_addr;
544 		}
545 reflect:
546 #if NCARP > 0
547 		if (m->m_pkthdr.rcvif->if_type == IFT_CARP &&
548 		    carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr,
549 		    &ip->ip_dst.s_addr))
550 			goto freeit;
551 #endif
552 		/* Free packet atttributes */
553 		if (m->m_flags & M_PKTHDR)
554 			m_tag_delete_chain(m);
555 
556 		icmpstat.icps_reflect++;
557 		icmpstat.icps_outhist[icp->icmp_type]++;
558 		if (!icmp_reflect(m, &opts, NULL))
559 			icmp_send(m, opts);
560 		return;
561 
562 	case ICMP_REDIRECT:
563 		/* Free packet atttributes */
564 		if (m->m_flags & M_PKTHDR)
565 			m_tag_delete_chain(m);
566 		if (icmp_rediraccept == 0 || ipforwarding == 1)
567 			goto freeit;
568 		if (code > 3)
569 			goto badcode;
570 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
571 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
572 			icmpstat.icps_badlen++;
573 			break;
574 		}
575 		/*
576 		 * Short circuit routing redirects to force
577 		 * immediate change in the kernel's routing
578 		 * tables.  The message is also handed to anyone
579 		 * listening on a raw socket (e.g. the routing
580 		 * daemon for use in updating its tables).
581 		 */
582 		icmpgw.sin_addr = ip->ip_src;
583 		icmpdst.sin_addr = icp->icmp_gwaddr;
584 #ifdef	ICMPPRINTFS
585 		if (icmpprintfs) {
586 			char buf[4 * sizeof("123")];
587 			strlcpy(buf, inet_ntoa(icp->icmp_ip.ip_dst),
588 			    sizeof buf);
589 
590 			printf("redirect dst %s to %s\n",
591 			    buf, inet_ntoa(icp->icmp_gwaddr));
592 		}
593 #endif
594 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
595 #if NCARP > 0
596 		if (m->m_pkthdr.rcvif->if_type == IFT_CARP &&
597 		    carp_lsdrop(m, AF_INET, &icmpsrc.sin_addr.s_addr,
598 		    &ip->ip_dst.s_addr))
599 			goto freeit;
600 #endif
601 		rt = NULL;
602 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
603 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
604 		    sintosa(&icmpgw), (struct rtentry **)&rt,
605 		    m->m_pkthdr.rdomain);
606 		if (rt != NULL && icmp_redirtimeout != 0) {
607 			(void)rt_timer_add(rt, icmp_redirect_timeout,
608 			    icmp_redirect_timeout_q, m->m_pkthdr.rdomain);
609 		}
610 		if (rt != NULL)
611 			rtfree(rt);
612 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
613 		break;
614 
615 	/*
616 	 * No kernel processing for the following;
617 	 * just fall through to send to raw listener.
618 	 */
619 	case ICMP_ECHOREPLY:
620 	case ICMP_ROUTERADVERT:
621 	case ICMP_ROUTERSOLICIT:
622 	case ICMP_TSTAMPREPLY:
623 	case ICMP_IREQREPLY:
624 	case ICMP_MASKREPLY:
625 	case ICMP_TRACEROUTE:
626 	case ICMP_DATACONVERR:
627 	case ICMP_MOBILE_REDIRECT:
628 	case ICMP_IPV6_WHEREAREYOU:
629 	case ICMP_IPV6_IAMHERE:
630 	case ICMP_MOBILE_REGREQUEST:
631 	case ICMP_MOBILE_REGREPLY:
632 	case ICMP_PHOTURIS:
633 	default:
634 		break;
635 	}
636 
637 raw:
638 	rip_input(m);
639 	return;
640 
641 freeit:
642 	m_freem(m);
643 }
644 
645 /*
646  * Reflect the ip packet back to the source
647  */
648 int
649 icmp_reflect(struct mbuf *m, struct mbuf **op, struct in_ifaddr *ia)
650 {
651 	struct ip *ip = mtod(m, struct ip *);
652 	struct in_addr t;
653 	struct mbuf *opts = 0;
654 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
655 
656 	if (!in_canforward(ip->ip_src) &&
657 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
658 	    htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
659 		m_freem(m);		/* Bad return address */
660 		return (EHOSTUNREACH);
661 	}
662 
663 #if NPF > 0
664 	pf_pkt_addr_changed(m);
665 #endif
666 	t = ip->ip_dst;
667 	ip->ip_dst = ip->ip_src;
668 	/*
669 	 * If the incoming packet was addressed directly to us,
670 	 * use dst as the src for the reply.  For broadcast, use
671 	 * the address which corresponds to the incoming interface.
672 	 */
673 	if (ia == NULL) {
674 		TAILQ_FOREACH(ia, &in_ifaddr, ia_list) {
675 			if (ia->ia_ifp->if_rdomain !=
676 			    rtable_l2(m->m_pkthdr.rdomain))
677 				continue;
678 			if (t.s_addr == ia->ia_addr.sin_addr.s_addr)
679 				break;
680 			if ((ia->ia_ifp->if_flags & IFF_BROADCAST) &&
681 			    ia->ia_broadaddr.sin_addr.s_addr != 0 &&
682 			    t.s_addr == ia->ia_broadaddr.sin_addr.s_addr)
683 				break;
684 		}
685 	}
686 	/*
687 	 * The following happens if the packet was not addressed to us.
688 	 * Use the new source address and do a route lookup. If it fails
689 	 * drop the packet as there is no path to the host.
690 	 */
691 	if (ia == NULL) {
692 		struct sockaddr_in *dst;
693 		struct route ro;
694 
695 		bzero((caddr_t) &ro, sizeof(ro));
696 		dst = satosin(&ro.ro_dst);
697 		dst->sin_family = AF_INET;
698 		dst->sin_len = sizeof(*dst);
699 		dst->sin_addr = ip->ip_src;
700 
701 		/* keep packet in the original virtual instance */
702 		ro.ro_rt = rtalloc1(&ro.ro_dst, RT_REPORT,
703 		     m->m_pkthdr.rdomain);
704 		if (ro.ro_rt == 0) {
705 			ipstat.ips_noroute++;
706 			m_freem(m);
707 			return (EHOSTUNREACH);
708 		}
709 
710 		ia = ifatoia(ro.ro_rt->rt_ifa);
711 		ro.ro_rt->rt_use++;
712 		RTFREE(ro.ro_rt);
713 	}
714 
715 	t = ia->ia_addr.sin_addr;
716 	ip->ip_src = t;
717 	ip->ip_ttl = MAXTTL;
718 
719 	if (optlen > 0) {
720 		u_char *cp;
721 		int opt, cnt;
722 		u_int len;
723 
724 		/*
725 		 * Retrieve any source routing from the incoming packet;
726 		 * add on any record-route or timestamp options.
727 		 */
728 		cp = (u_char *) (ip + 1);
729 		if (op && (opts = ip_srcroute()) == 0 &&
730 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
731 			opts->m_len = sizeof(struct in_addr);
732 			mtod(opts, struct in_addr *)->s_addr = 0;
733 		}
734 		if (op && opts) {
735 #ifdef ICMPPRINTFS
736 			if (icmpprintfs)
737 				printf("icmp_reflect optlen %d rt %d => ",
738 				    optlen, opts->m_len);
739 #endif
740 			for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
741 				opt = cp[IPOPT_OPTVAL];
742 				if (opt == IPOPT_EOL)
743 					break;
744 				if (opt == IPOPT_NOP)
745 					len = 1;
746 				else {
747 					if (cnt < IPOPT_OLEN + sizeof(*cp))
748 						break;
749 					len = cp[IPOPT_OLEN];
750 					if (len < IPOPT_OLEN + sizeof(*cp) ||
751 					    len > cnt)
752 						break;
753 				}
754 				/*
755 				 * Should check for overflow, but it
756 				 * "can't happen"
757 				 */
758 				if (opt == IPOPT_RR || opt == IPOPT_TS ||
759 				    opt == IPOPT_SECURITY) {
760 					bcopy((caddr_t)cp,
761 					    mtod(opts, caddr_t) + opts->m_len,
762 					    len);
763 					opts->m_len += len;
764 				}
765 			}
766 			/* Terminate & pad, if necessary */
767 			if ((cnt = opts->m_len % 4) != 0)
768 				for (; cnt < 4; cnt++) {
769 					*(mtod(opts, caddr_t) + opts->m_len) =
770 					    IPOPT_EOL;
771 					opts->m_len++;
772 				}
773 #ifdef ICMPPRINTFS
774 			if (icmpprintfs)
775 				printf("%d\n", opts->m_len);
776 #endif
777 		}
778 		/*
779 		 * Now strip out original options by copying rest of first
780 		 * mbuf's data back, and adjust the IP length.
781 		 */
782 		ip->ip_len = htons(ntohs(ip->ip_len) - optlen);
783 		ip->ip_hl = sizeof(struct ip) >> 2;
784 		m->m_len -= optlen;
785 		if (m->m_flags & M_PKTHDR)
786 			m->m_pkthdr.len -= optlen;
787 		optlen += sizeof(struct ip);
788 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
789 		    m->m_len - sizeof(struct ip));
790 	}
791 	m->m_flags &= ~(M_BCAST|M_MCAST);
792 	if (op)
793 		*op = opts;
794 
795 	return (0);
796 }
797 
798 /*
799  * Send an icmp packet back to the ip level,
800  * after supplying a checksum.
801  */
802 void
803 icmp_send(struct mbuf *m, struct mbuf *opts)
804 {
805 	struct ip *ip = mtod(m, struct ip *);
806 	int hlen;
807 	struct icmp *icp;
808 
809 	hlen = ip->ip_hl << 2;
810 	m->m_data += hlen;
811 	m->m_len -= hlen;
812 	icp = mtod(m, struct icmp *);
813 	icp->icmp_cksum = 0;
814 	icp->icmp_cksum = in_cksum(m, ntohs(ip->ip_len) - hlen);
815 	m->m_data -= hlen;
816 	m->m_len += hlen;
817 #ifdef ICMPPRINTFS
818 	if (icmpprintfs) {
819 		char buf[4 * sizeof("123")];
820 
821 		strlcpy(buf, inet_ntoa(ip->ip_dst), sizeof buf);
822 		printf("icmp_send dst %s src %s\n",
823 		    buf, inet_ntoa(ip->ip_src));
824 	}
825 #endif
826 	(void)ip_output(m, opts, (void *)NULL, 0, (void *)NULL, (void *)NULL);
827 }
828 
829 n_time
830 iptime(void)
831 {
832 	struct timeval atv;
833 	u_long t;
834 
835 	microtime(&atv);
836 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
837 	return (htonl(t));
838 }
839 
840 int
841 icmp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
842     size_t newlen)
843 {
844 	int s, error;
845 
846 	/* All sysctl names at this level are terminal. */
847 	if (namelen != 1)
848 		return (ENOTDIR);
849 
850 	s = splsoftnet();
851 	switch (name[0]) {
852 	case ICMPCTL_REDIRTIMEOUT:
853 
854 		error = sysctl_int(oldp, oldlenp, newp, newlen,
855 		    &icmp_redirtimeout);
856 		if (icmp_redirect_timeout_q != NULL) {
857 			if (icmp_redirtimeout == 0) {
858 				rt_timer_queue_destroy(icmp_redirect_timeout_q,
859 				    TRUE);
860 				icmp_redirect_timeout_q = NULL;
861 			} else
862 				rt_timer_queue_change(icmp_redirect_timeout_q,
863 				    icmp_redirtimeout);
864 		} else if (icmp_redirtimeout > 0) {
865 			icmp_redirect_timeout_q =
866 			    rt_timer_queue_create(icmp_redirtimeout);
867 		}
868 		break;
869 
870 	case ICMPCTL_STATS:
871 		if (newp != NULL) {
872 			error = EPERM;
873 			break;
874 		}
875 		error = sysctl_struct(oldp, oldlenp, newp, newlen,
876 		    &icmpstat, sizeof(icmpstat));
877 		break;
878 
879 	default:
880 		if (name[0] < ICMPCTL_MAXID) {
881 			error = sysctl_int_arr(icmpctl_vars, name, namelen,
882 			    oldp, oldlenp, newp, newlen);
883 			break;
884 		}
885 		error = ENOPROTOOPT;
886 		break;
887 	}
888 	splx(s);
889 
890 	return (error);
891 }
892 
893 
894 struct rtentry *
895 icmp_mtudisc_clone(struct sockaddr *dst, u_int rtableid)
896 {
897 	struct rtentry *rt;
898 	int error;
899 
900 	rt = rtalloc1(dst, RT_REPORT, rtableid);
901 	if (rt == 0)
902 		return (NULL);
903 
904 	/* Check if the route is actually usable */
905 	if (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE) ||
906 	    (rt->rt_flags & RTF_UP) == 0)
907 		return (NULL);
908 
909 	/* If we didn't get a host route, allocate one */
910 
911 	if ((rt->rt_flags & RTF_HOST) == 0) {
912 		struct rtentry *nrt;
913 		struct rt_addrinfo info;
914 
915 		bzero(&info, sizeof(info));
916 		info.rti_info[RTAX_DST] = dst;
917 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
918 		info.rti_flags = RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC;
919 
920 		error = rtrequest1(RTM_ADD, &info, RTP_DEFAULT, &nrt, rtableid);
921 		if (error) {
922 			rtfree(rt);
923 			return (NULL);
924 		}
925 		nrt->rt_rmx = rt->rt_rmx;
926 		rtfree(rt);
927 		rt = nrt;
928 	}
929 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q,
930 	    rtableid);
931 	if (error) {
932 		rtfree(rt);
933 		return (NULL);
934 	}
935 
936 	return (rt);
937 }
938 
939 void
940 icmp_mtudisc(struct icmp *icp, u_int rtableid)
941 {
942 	struct rtentry *rt;
943 	struct sockaddr *dst = sintosa(&icmpsrc);
944 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
945 
946 	/* Table of common MTUs: */
947 
948 	static u_short mtu_table[] = {
949 		65535, 65280, 32000, 17914, 9180, 8166,
950 		4352, 2002, 1492, 1006, 508, 296, 68, 0
951 	};
952 
953 	rt = icmp_mtudisc_clone(dst, rtableid);
954 	if (rt == 0)
955 		return;
956 
957 	if (mtu == 0) {
958 		int i = 0;
959 
960 		mtu = ntohs(icp->icmp_ip.ip_len);
961 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
962 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
963 			mtu -= (icp->icmp_ip.ip_hl << 2);
964 
965 		/* If we still can't guess a value, try the route */
966 
967 		if (mtu == 0) {
968 			mtu = rt->rt_rmx.rmx_mtu;
969 
970 			/* If no route mtu, default to the interface mtu */
971 
972 			if (mtu == 0)
973 				mtu = rt->rt_ifp->if_mtu;
974 		}
975 
976 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
977 			if (mtu > mtu_table[i]) {
978 				mtu = mtu_table[i];
979 				break;
980 			}
981 	}
982 
983 	/*
984 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
985 	 *	  to turn off PMTU for a route, and the kernel can
986 	 *	  set it to indicate a serious problem with PMTU
987 	 *	  on a route.  We should be using a separate flag
988 	 *	  for the kernel to indicate this.
989 	 */
990 
991 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
992 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
993 			rt->rt_rmx.rmx_locks |= RTV_MTU;
994 		else if (rt->rt_rmx.rmx_mtu > mtu ||
995 		    rt->rt_rmx.rmx_mtu == 0)
996 			rt->rt_rmx.rmx_mtu = mtu;
997 	}
998 
999 	rtfree(rt);
1000 }
1001 
1002 void
1003 icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
1004 {
1005 	if (rt == NULL)
1006 		panic("icmp_mtudisc_timeout:  bad route to timeout");
1007 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1008 	    (RTF_DYNAMIC | RTF_HOST)) {
1009 		void *(*ctlfunc)(int, struct sockaddr *, u_int, void *);
1010 		extern u_char ip_protox[];
1011 		struct sockaddr_in sa;
1012 		struct rt_addrinfo info;
1013 
1014 		bzero(&info, sizeof(info));
1015 		info.rti_info[RTAX_DST] = rt_key(rt);
1016 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1017 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1018 		info.rti_flags = rt->rt_flags;
1019 
1020 		sa = *(struct sockaddr_in *)rt_key(rt);
1021 		rtrequest1(RTM_DELETE, &info, rt->rt_priority, NULL,
1022 		    r->rtt_tableid);
1023 
1024 		/* Notify TCP layer of increased Path MTU estimate */
1025 		ctlfunc = inetsw[ip_protox[IPPROTO_TCP]].pr_ctlinput;
1026 		if (ctlfunc)
1027 			(*ctlfunc)(PRC_MTUINC,(struct sockaddr *)&sa,
1028 			    r->rtt_tableid, NULL);
1029 	} else
1030 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
1031 			rt->rt_rmx.rmx_mtu = 0;
1032 }
1033 
1034 /*
1035  * Perform rate limit check.
1036  * Returns 0 if it is okay to send the icmp packet.
1037  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
1038  * limitation.
1039  *
1040  * XXX per-destination/type check necessary?
1041  */
1042 int
1043 icmp_ratelimit(const struct in_addr *dst, const int type, const int code)
1044 {
1045 
1046 	/* PPS limit */
1047 	if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
1048 	    icmperrppslim))
1049 		return 1;
1050 
1051 	/*okay to send*/
1052 	return 0;
1053 }
1054 
1055 void
1056 icmp_redirect_timeout(struct rtentry *rt, struct rttimer *r)
1057 {
1058 	if (rt == NULL)
1059 		panic("icmp_redirect_timeout:  bad route to timeout");
1060 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1061 	    (RTF_DYNAMIC | RTF_HOST)) {
1062 		struct rt_addrinfo info;
1063 
1064 		bzero(&info, sizeof(info));
1065 		info.rti_info[RTAX_DST] = rt_key(rt);
1066 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1067 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1068 		info.rti_flags = rt->rt_flags;
1069 
1070 		rtrequest1(RTM_DELETE, &info, rt->rt_priority, NULL,
1071 		    r->rtt_tableid);
1072 	}
1073 }
1074 
1075 int
1076 icmp_do_exthdr(struct mbuf *m, u_int16_t class, u_int8_t ctype, void *buf,
1077     size_t len)
1078 {
1079 	struct ip *ip = mtod(m, struct ip *);
1080 	int hlen, off;
1081 	struct mbuf *n;
1082 	struct icmp *icp;
1083 	struct icmp_ext_hdr *ieh;
1084 	struct {
1085 		struct icmp_ext_hdr	ieh;
1086 		struct icmp_ext_obj_hdr	ieo;
1087 	} hdr;
1088 
1089 	hlen = ip->ip_hl << 2;
1090 	icp = (struct icmp *)(mtod(m, caddr_t) + hlen);
1091 	if (icp->icmp_type != ICMP_TIMXCEED && icp->icmp_type != ICMP_UNREACH &&
1092 	    icp->icmp_type != ICMP_PARAMPROB)
1093 		/* exthdr not supported */
1094 		return (0);
1095 
1096 	if (icp->icmp_length != 0)
1097 		/* exthdr already present, giving up */
1098 		return (0);
1099 
1100 	/* the actuall offset starts after the common ICMP header */
1101 	hlen += ICMP_MINLEN;
1102 	/* exthdr must start on a word boundary */
1103 	off = roundup(ntohs(ip->ip_len) - hlen, sizeof(u_int32_t));
1104 	/* ... and at an offset of ICMP_EXT_OFFSET or bigger */
1105 	off = max(off, ICMP_EXT_OFFSET);
1106 	icp->icmp_length = off / sizeof(u_int32_t);
1107 
1108 	bzero(&hdr, sizeof(hdr));
1109 	hdr.ieh.ieh_version = ICMP_EXT_HDR_VERSION;
1110 	hdr.ieo.ieo_length = htons(sizeof(struct icmp_ext_obj_hdr) + len);
1111 	hdr.ieo.ieo_cnum = class;
1112 	hdr.ieo.ieo_ctype = ctype;
1113 
1114 	if (m_copyback(m, hlen + off, sizeof(hdr), &hdr, M_NOWAIT) ||
1115 	    m_copyback(m, hlen + off + sizeof(hdr), len, buf, M_NOWAIT)) {
1116 		m_freem(m);
1117 		return (ENOBUFS);
1118 	}
1119 
1120 	/* calculate checksum */
1121 	n = m_getptr(m, hlen + off, &off);
1122 	if (n == NULL)
1123 		panic("icmp_do_exthdr: m_getptr failure");
1124 	/* this is disgusting, in_cksum() is stupid */
1125 	n->m_data += off;
1126 	n->m_len -= off;
1127 	ieh = mtod(n, struct icmp_ext_hdr *);
1128 	ieh->ieh_cksum = in_cksum(n, sizeof(hdr) + len);
1129 	n->m_data -= off;
1130 	n->m_len += off;
1131 
1132 	ip->ip_len = htons(m->m_pkthdr.len);
1133 
1134 	return (0);
1135 }
1136