1 /* $OpenBSD: ip_icmp.c,v 1.181 2018/11/28 08:15:29 claudio Exp $ */ 2 /* $NetBSD: ip_icmp.c,v 1.19 1996/02/13 23:42:22 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include "carp.h" 72 #include "pf.h" 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/mbuf.h> 77 #include <sys/protosw.h> 78 #include <sys/socket.h> 79 #include <sys/sysctl.h> 80 81 #include <net/if.h> 82 #include <net/if_var.h> 83 #include <net/route.h> 84 85 #include <netinet/in.h> 86 #include <netinet/in_systm.h> 87 #include <netinet/in_var.h> 88 #include <netinet/ip.h> 89 #include <netinet/ip_icmp.h> 90 #include <netinet/ip_var.h> 91 #include <netinet/icmp_var.h> 92 93 #if NCARP > 0 94 #include <net/if_types.h> 95 #include <netinet/ip_carp.h> 96 #endif 97 98 #if NPF > 0 99 #include <net/pfvar.h> 100 #endif 101 102 /* 103 * ICMP routines: error generation, receive packet processing, and 104 * routines to turnaround packets back to the originator, and 105 * host table maintenance routines. 106 */ 107 108 #ifdef ICMPPRINTFS 109 int icmpprintfs = 0; /* Settable from ddb */ 110 #endif 111 112 /* values controllable via sysctl */ 113 int icmpmaskrepl = 0; 114 int icmpbmcastecho = 0; 115 int icmptstamprepl = 1; 116 int icmperrppslim = 100; 117 int icmp_rediraccept = 0; 118 int icmp_redirtimeout = 10 * 60; 119 120 static int icmperrpps_count = 0; 121 static struct timeval icmperrppslim_last; 122 123 static struct rttimer_queue *icmp_redirect_timeout_q = NULL; 124 struct cpumem *icmpcounters; 125 126 int *icmpctl_vars[ICMPCTL_MAXID] = ICMPCTL_VARS; 127 128 void icmp_mtudisc_timeout(struct rtentry *, struct rttimer *); 129 int icmp_ratelimit(const struct in_addr *, const int, const int); 130 void icmp_redirect_timeout(struct rtentry *, struct rttimer *); 131 int icmp_input_if(struct ifnet *, struct mbuf **, int *, int, int); 132 int icmp_sysctl_icmpstat(void *, size_t *, void *); 133 134 void 135 icmp_init(void) 136 { 137 icmpcounters = counters_alloc(icps_ncounters); 138 /* 139 * This is only useful if the user initializes redirtimeout to 140 * something other than zero. 141 */ 142 if (icmp_redirtimeout != 0) { 143 icmp_redirect_timeout_q = 144 rt_timer_queue_create(icmp_redirtimeout); 145 } 146 } 147 148 struct mbuf * 149 icmp_do_error(struct mbuf *n, int type, int code, u_int32_t dest, int destmtu) 150 { 151 struct ip *oip = mtod(n, struct ip *), *nip; 152 unsigned oiplen = oip->ip_hl << 2; 153 struct icmp *icp; 154 struct mbuf *m; 155 unsigned icmplen, mblen; 156 157 #ifdef ICMPPRINTFS 158 if (icmpprintfs) 159 printf("icmp_error(%x, %d, %d)\n", oip, type, code); 160 #endif 161 if (type != ICMP_REDIRECT) 162 icmpstat_inc(icps_error); 163 /* 164 * Don't send error if not the first fragment of message. 165 * Don't error if the old packet protocol was ICMP 166 * error message, only known informational types. 167 */ 168 if (oip->ip_off & htons(IP_OFFMASK)) 169 goto freeit; 170 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT && 171 n->m_len >= oiplen + ICMP_MINLEN && 172 !ICMP_INFOTYPE(((struct icmp *) 173 ((caddr_t)oip + oiplen))->icmp_type)) { 174 icmpstat_inc(icps_oldicmp); 175 goto freeit; 176 } 177 /* Don't send error in response to a multicast or broadcast packet */ 178 if (n->m_flags & (M_BCAST|M_MCAST)) 179 goto freeit; 180 181 /* 182 * First, do a rate limitation check. 183 */ 184 if (icmp_ratelimit(&oip->ip_src, type, code)) { 185 icmpstat_inc(icps_toofreq); 186 goto freeit; 187 } 188 189 /* 190 * Now, formulate icmp message 191 */ 192 icmplen = oiplen + min(8, ntohs(oip->ip_len)); 193 /* 194 * Defend against mbuf chains shorter than oip->ip_len: 195 */ 196 mblen = 0; 197 for (m = n; m && (mblen < icmplen); m = m->m_next) 198 mblen += m->m_len; 199 icmplen = min(mblen, icmplen); 200 201 /* 202 * As we are not required to return everything we have, 203 * we return whatever we can return at ease. 204 * 205 * Note that ICMP datagrams longer than 576 octets are out of spec 206 * according to RFC1812; 207 */ 208 209 KASSERT(ICMP_MINLEN + sizeof (struct ip) <= MCLBYTES); 210 211 if (sizeof (struct ip) + icmplen + ICMP_MINLEN > MCLBYTES) 212 icmplen = MCLBYTES - ICMP_MINLEN - sizeof (struct ip); 213 214 m = m_gethdr(M_DONTWAIT, MT_HEADER); 215 if (m && ((sizeof (struct ip) + icmplen + ICMP_MINLEN + 216 sizeof(long) - 1) &~ (sizeof(long) - 1)) > MHLEN) { 217 MCLGET(m, M_DONTWAIT); 218 if ((m->m_flags & M_EXT) == 0) { 219 m_freem(m); 220 m = NULL; 221 } 222 } 223 if (m == NULL) 224 goto freeit; 225 /* keep in same rtable and preserve other pkthdr bits */ 226 m->m_pkthdr.ph_rtableid = n->m_pkthdr.ph_rtableid; 227 m->m_pkthdr.ph_ifidx = n->m_pkthdr.ph_ifidx; 228 /* move PF_GENERATED to new packet, if existent XXX preserve more? */ 229 if (n->m_pkthdr.pf.flags & PF_TAG_GENERATED) 230 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 231 m->m_pkthdr.len = m->m_len = icmplen + ICMP_MINLEN; 232 m_align(m, m->m_len); 233 icp = mtod(m, struct icmp *); 234 if ((u_int)type > ICMP_MAXTYPE) 235 panic("icmp_error"); 236 icmpstat_inc(icps_outhist + type); 237 icp->icmp_type = type; 238 if (type == ICMP_REDIRECT) 239 icp->icmp_gwaddr.s_addr = dest; 240 else { 241 icp->icmp_void = 0; 242 /* 243 * The following assignments assume an overlay with the 244 * zeroed icmp_void field. 245 */ 246 if (type == ICMP_PARAMPROB) { 247 icp->icmp_pptr = code; 248 code = 0; 249 } else if (type == ICMP_UNREACH && 250 code == ICMP_UNREACH_NEEDFRAG && destmtu) 251 icp->icmp_nextmtu = htons(destmtu); 252 } 253 254 icp->icmp_code = code; 255 m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip); 256 257 /* 258 * Now, copy old ip header (without options) 259 * in front of icmp message. 260 */ 261 m = m_prepend(m, sizeof(struct ip), M_DONTWAIT); 262 if (m == NULL) 263 goto freeit; 264 nip = mtod(m, struct ip *); 265 /* ip_v set in ip_output */ 266 nip->ip_hl = sizeof(struct ip) >> 2; 267 nip->ip_tos = 0; 268 nip->ip_len = htons(m->m_len); 269 /* ip_id set in ip_output */ 270 nip->ip_off = 0; 271 /* ip_ttl set in icmp_reflect */ 272 nip->ip_p = IPPROTO_ICMP; 273 nip->ip_src = oip->ip_src; 274 nip->ip_dst = oip->ip_dst; 275 276 m_freem(n); 277 return (m); 278 279 freeit: 280 m_freem(n); 281 return (NULL); 282 } 283 284 /* 285 * Generate an error packet of type error 286 * in response to bad packet ip. 287 * 288 * The ip packet inside has ip_off and ip_len in host byte order. 289 */ 290 void 291 icmp_error(struct mbuf *n, int type, int code, u_int32_t dest, int destmtu) 292 { 293 struct mbuf *m; 294 295 m = icmp_do_error(n, type, code, dest, destmtu); 296 if (m != NULL) 297 if (!icmp_reflect(m, NULL, NULL)) 298 icmp_send(m, NULL); 299 } 300 301 /* 302 * Process a received ICMP message. 303 */ 304 int 305 icmp_input(struct mbuf **mp, int *offp, int proto, int af) 306 { 307 struct ifnet *ifp; 308 309 ifp = if_get((*mp)->m_pkthdr.ph_ifidx); 310 if (ifp == NULL) { 311 m_freemp(mp); 312 return IPPROTO_DONE; 313 } 314 315 proto = icmp_input_if(ifp, mp, offp, proto, af); 316 if_put(ifp); 317 return proto; 318 } 319 320 int 321 icmp_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto, int af) 322 { 323 struct mbuf *m = *mp; 324 int hlen = *offp; 325 struct icmp *icp; 326 struct ip *ip = mtod(m, struct ip *); 327 struct sockaddr_in sin; 328 int icmplen, i, code; 329 struct in_ifaddr *ia; 330 void (*ctlfunc)(int, struct sockaddr *, u_int, void *); 331 struct mbuf *opts; 332 333 /* 334 * Locate icmp structure in mbuf, and check 335 * that not corrupted and of at least minimum length. 336 */ 337 icmplen = ntohs(ip->ip_len) - hlen; 338 #ifdef ICMPPRINTFS 339 if (icmpprintfs) { 340 char dst[INET_ADDRSTRLEN], src[INET_ADDRSTRLEN]; 341 342 inet_ntop(AF_INET, &ip->ip_dst, dst, sizeof(dst)); 343 inet_ntop(AF_INET, &ip->ip_src, src, sizeof(src)); 344 345 printf("icmp_input from %s to %s, len %d\n", src, dst, icmplen); 346 } 347 #endif 348 if (icmplen < ICMP_MINLEN) { 349 icmpstat_inc(icps_tooshort); 350 goto freeit; 351 } 352 i = hlen + min(icmplen, ICMP_ADVLENMAX); 353 if ((m = *mp = m_pullup(m, i)) == NULL) { 354 icmpstat_inc(icps_tooshort); 355 return IPPROTO_DONE; 356 } 357 ip = mtod(m, struct ip *); 358 if (in4_cksum(m, 0, hlen, icmplen)) { 359 icmpstat_inc(icps_checksum); 360 goto freeit; 361 } 362 363 icp = (struct icmp *)(mtod(m, caddr_t) + hlen); 364 #ifdef ICMPPRINTFS 365 /* 366 * Message type specific processing. 367 */ 368 if (icmpprintfs) 369 printf("icmp_input, type %d code %d\n", icp->icmp_type, 370 icp->icmp_code); 371 #endif 372 if (icp->icmp_type > ICMP_MAXTYPE) 373 goto raw; 374 #if NPF > 0 375 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 376 switch (icp->icmp_type) { 377 /* 378 * As pf_icmp_mapping() considers redirects belonging to a 379 * diverted connection, we must include it here. 380 */ 381 case ICMP_REDIRECT: 382 /* FALLTHROUGH */ 383 /* 384 * These ICMP types map to other connections. They must be 385 * delivered to pr_ctlinput() also for diverted connections. 386 */ 387 case ICMP_UNREACH: 388 case ICMP_TIMXCEED: 389 case ICMP_PARAMPROB: 390 case ICMP_SOURCEQUENCH: 391 /* 392 * Do not use the divert-to property of the TCP or UDP 393 * rule when doing the PCB lookup for the raw socket. 394 */ 395 m->m_pkthdr.pf.flags &=~ PF_TAG_DIVERTED; 396 break; 397 default: 398 goto raw; 399 } 400 } 401 #endif /* NPF */ 402 icmpstat_inc(icps_inhist + icp->icmp_type); 403 code = icp->icmp_code; 404 switch (icp->icmp_type) { 405 406 case ICMP_UNREACH: 407 switch (code) { 408 case ICMP_UNREACH_NET: 409 case ICMP_UNREACH_HOST: 410 case ICMP_UNREACH_PROTOCOL: 411 case ICMP_UNREACH_PORT: 412 case ICMP_UNREACH_SRCFAIL: 413 code += PRC_UNREACH_NET; 414 break; 415 416 case ICMP_UNREACH_NEEDFRAG: 417 code = PRC_MSGSIZE; 418 break; 419 420 case ICMP_UNREACH_NET_UNKNOWN: 421 case ICMP_UNREACH_NET_PROHIB: 422 case ICMP_UNREACH_TOSNET: 423 code = PRC_UNREACH_NET; 424 break; 425 426 case ICMP_UNREACH_HOST_UNKNOWN: 427 case ICMP_UNREACH_ISOLATED: 428 case ICMP_UNREACH_HOST_PROHIB: 429 case ICMP_UNREACH_TOSHOST: 430 case ICMP_UNREACH_FILTER_PROHIB: 431 case ICMP_UNREACH_HOST_PRECEDENCE: 432 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 433 code = PRC_UNREACH_HOST; 434 break; 435 436 default: 437 goto badcode; 438 } 439 goto deliver; 440 441 case ICMP_TIMXCEED: 442 if (code > 1) 443 goto badcode; 444 code += PRC_TIMXCEED_INTRANS; 445 goto deliver; 446 447 case ICMP_PARAMPROB: 448 if (code > 1) 449 goto badcode; 450 code = PRC_PARAMPROB; 451 goto deliver; 452 453 case ICMP_SOURCEQUENCH: 454 if (code) 455 goto badcode; 456 code = PRC_QUENCH; 457 deliver: 458 /* 459 * Problem with datagram; advise higher level routines. 460 */ 461 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 462 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { 463 icmpstat_inc(icps_badlen); 464 goto freeit; 465 } 466 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr)) 467 goto badcode; 468 #ifdef INET6 469 /* Get more contiguous data for a v6 in v4 ICMP message. */ 470 if (icp->icmp_ip.ip_p == IPPROTO_IPV6) { 471 if (icmplen < ICMP_V6ADVLENMIN || 472 icmplen < ICMP_V6ADVLEN(icp)) { 473 icmpstat_inc(icps_badlen); 474 goto freeit; 475 } 476 } 477 #endif /* INET6 */ 478 #ifdef ICMPPRINTFS 479 if (icmpprintfs) 480 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); 481 #endif 482 memset(&sin, 0, sizeof(sin)); 483 sin.sin_family = AF_INET; 484 sin.sin_len = sizeof(struct sockaddr_in); 485 sin.sin_addr = icp->icmp_ip.ip_dst; 486 #if NCARP > 0 487 if (carp_lsdrop(ifp, m, AF_INET, &sin.sin_addr.s_addr, 488 &ip->ip_dst.s_addr, 1)) 489 goto freeit; 490 #endif 491 /* 492 * XXX if the packet contains [IPv4 AH TCP], we can't make a 493 * notification to TCP layer. 494 */ 495 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput; 496 if (ctlfunc) 497 (*ctlfunc)(code, sintosa(&sin), m->m_pkthdr.ph_rtableid, 498 &icp->icmp_ip); 499 break; 500 501 badcode: 502 icmpstat_inc(icps_badcode); 503 break; 504 505 case ICMP_ECHO: 506 if (!icmpbmcastecho && 507 (m->m_flags & (M_MCAST | M_BCAST)) != 0) { 508 icmpstat_inc(icps_bmcastecho); 509 break; 510 } 511 icp->icmp_type = ICMP_ECHOREPLY; 512 goto reflect; 513 514 case ICMP_TSTAMP: 515 if (icmptstamprepl == 0) 516 break; 517 518 if (!icmpbmcastecho && 519 (m->m_flags & (M_MCAST | M_BCAST)) != 0) { 520 icmpstat_inc(icps_bmcastecho); 521 break; 522 } 523 if (icmplen < ICMP_TSLEN) { 524 icmpstat_inc(icps_badlen); 525 break; 526 } 527 icp->icmp_type = ICMP_TSTAMPREPLY; 528 icp->icmp_rtime = iptime(); 529 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */ 530 goto reflect; 531 532 case ICMP_MASKREQ: 533 if (icmpmaskrepl == 0) 534 break; 535 if (icmplen < ICMP_MASKLEN) { 536 icmpstat_inc(icps_badlen); 537 break; 538 } 539 /* 540 * We are not able to respond with all ones broadcast 541 * unless we receive it over a point-to-point interface. 542 */ 543 memset(&sin, 0, sizeof(sin)); 544 sin.sin_family = AF_INET; 545 sin.sin_len = sizeof(struct sockaddr_in); 546 if (ip->ip_dst.s_addr == INADDR_BROADCAST || 547 ip->ip_dst.s_addr == INADDR_ANY) 548 sin.sin_addr = ip->ip_src; 549 else 550 sin.sin_addr = ip->ip_dst; 551 if (ifp == NULL) 552 break; 553 ia = ifatoia(ifaof_ifpforaddr(sintosa(&sin), ifp)); 554 if (ia == NULL) 555 break; 556 icp->icmp_type = ICMP_MASKREPLY; 557 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr; 558 if (ip->ip_src.s_addr == 0) { 559 if (ifp->if_flags & IFF_BROADCAST) { 560 if (ia->ia_broadaddr.sin_addr.s_addr) 561 ip->ip_src = ia->ia_broadaddr.sin_addr; 562 else 563 ip->ip_src.s_addr = INADDR_BROADCAST; 564 } 565 else if (ifp->if_flags & IFF_POINTOPOINT) 566 ip->ip_src = ia->ia_dstaddr.sin_addr; 567 } 568 reflect: 569 #if NCARP > 0 570 if (carp_lsdrop(ifp, m, AF_INET, &ip->ip_src.s_addr, 571 &ip->ip_dst.s_addr, 1)) 572 goto freeit; 573 #endif 574 icmpstat_inc(icps_reflect); 575 icmpstat_inc(icps_outhist + icp->icmp_type); 576 if (!icmp_reflect(m, &opts, NULL)) { 577 icmp_send(m, opts); 578 m_free(opts); 579 } 580 return IPPROTO_DONE; 581 582 case ICMP_REDIRECT: 583 { 584 struct sockaddr_in sdst; 585 struct sockaddr_in sgw; 586 struct sockaddr_in ssrc; 587 struct rtentry *newrt = NULL; 588 589 if (icmp_rediraccept == 0 || ipforwarding == 1) 590 goto freeit; 591 if (code > 3) 592 goto badcode; 593 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 594 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { 595 icmpstat_inc(icps_badlen); 596 break; 597 } 598 /* 599 * Short circuit routing redirects to force 600 * immediate change in the kernel's routing 601 * tables. The message is also handed to anyone 602 * listening on a raw socket (e.g. the routing 603 * daemon for use in updating its tables). 604 */ 605 memset(&sdst, 0, sizeof(sdst)); 606 memset(&sgw, 0, sizeof(sgw)); 607 memset(&ssrc, 0, sizeof(ssrc)); 608 sdst.sin_family = sgw.sin_family = ssrc.sin_family = AF_INET; 609 sdst.sin_len = sgw.sin_len = ssrc.sin_len = sizeof(sdst); 610 memcpy(&sdst.sin_addr, &icp->icmp_ip.ip_dst, 611 sizeof(sdst.sin_addr)); 612 memcpy(&sgw.sin_addr, &icp->icmp_gwaddr, 613 sizeof(sgw.sin_addr)); 614 memcpy(&ssrc.sin_addr, &ip->ip_src, 615 sizeof(ssrc.sin_addr)); 616 617 #ifdef ICMPPRINTFS 618 if (icmpprintfs) { 619 char gw[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 620 621 inet_ntop(AF_INET, &icp->icmp_gwaddr, gw, sizeof(gw)); 622 inet_ntop(AF_INET, &icp->icmp_ip.ip_dst, 623 dst, sizeof(dst)); 624 625 printf("redirect dst %s to %s\n", dst, gw); 626 } 627 #endif 628 629 #if NCARP > 0 630 if (carp_lsdrop(ifp, m, AF_INET, &sdst.sin_addr.s_addr, 631 &ip->ip_dst.s_addr, 1)) 632 goto freeit; 633 #endif 634 rtredirect(sintosa(&sdst), sintosa(&sgw), 635 sintosa(&ssrc), &newrt, m->m_pkthdr.ph_rtableid); 636 if (newrt != NULL && icmp_redirtimeout != 0) { 637 (void)rt_timer_add(newrt, icmp_redirect_timeout, 638 icmp_redirect_timeout_q, m->m_pkthdr.ph_rtableid); 639 } 640 if (newrt != NULL) 641 rtfree(newrt); 642 pfctlinput(PRC_REDIRECT_HOST, sintosa(&sdst)); 643 break; 644 } 645 /* 646 * No kernel processing for the following; 647 * just fall through to send to raw listener. 648 */ 649 case ICMP_ECHOREPLY: 650 case ICMP_ROUTERADVERT: 651 case ICMP_ROUTERSOLICIT: 652 case ICMP_TSTAMPREPLY: 653 case ICMP_IREQREPLY: 654 case ICMP_MASKREPLY: 655 case ICMP_TRACEROUTE: 656 case ICMP_DATACONVERR: 657 case ICMP_MOBILE_REDIRECT: 658 case ICMP_IPV6_WHEREAREYOU: 659 case ICMP_IPV6_IAMHERE: 660 case ICMP_MOBILE_REGREQUEST: 661 case ICMP_MOBILE_REGREPLY: 662 case ICMP_PHOTURIS: 663 default: 664 break; 665 } 666 667 raw: 668 return rip_input(mp, offp, proto, af); 669 670 freeit: 671 m_freem(m); 672 return IPPROTO_DONE; 673 } 674 675 /* 676 * Reflect the ip packet back to the source 677 */ 678 int 679 icmp_reflect(struct mbuf *m, struct mbuf **op, struct in_ifaddr *ia) 680 { 681 struct ip *ip = mtod(m, struct ip *); 682 struct mbuf *opts = NULL; 683 struct sockaddr_in sin; 684 struct rtentry *rt = NULL; 685 int optlen = (ip->ip_hl << 2) - sizeof(struct ip); 686 u_int rtableid; 687 688 if (!in_canforward(ip->ip_src) && 689 ((ip->ip_src.s_addr & IN_CLASSA_NET) != 690 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) { 691 m_freem(m); /* Bad return address */ 692 return (EHOSTUNREACH); 693 } 694 695 if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) { 696 m_freem(m); 697 return (ELOOP); 698 } 699 rtableid = m->m_pkthdr.ph_rtableid; 700 m_resethdr(m); 701 m->m_pkthdr.ph_rtableid = rtableid; 702 703 /* 704 * If the incoming packet was addressed directly to us, 705 * use dst as the src for the reply. For broadcast, use 706 * the address which corresponds to the incoming interface. 707 */ 708 if (ia == NULL) { 709 memset(&sin, 0, sizeof(sin)); 710 sin.sin_len = sizeof(sin); 711 sin.sin_family = AF_INET; 712 sin.sin_addr = ip->ip_dst; 713 714 rt = rtalloc(sintosa(&sin), 0, rtableid); 715 if (rtisvalid(rt) && 716 ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) 717 ia = ifatoia(rt->rt_ifa); 718 } 719 720 /* 721 * The following happens if the packet was not addressed to us. 722 * Use the new source address and do a route lookup. If it fails 723 * drop the packet as there is no path to the host. 724 */ 725 if (ia == NULL) { 726 rtfree(rt); 727 728 memset(&sin, 0, sizeof(sin)); 729 sin.sin_len = sizeof(sin); 730 sin.sin_family = AF_INET; 731 sin.sin_addr = ip->ip_src; 732 733 /* keep packet in the original virtual instance */ 734 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid); 735 if (rt == NULL) { 736 ipstat_inc(ips_noroute); 737 m_freem(m); 738 return (EHOSTUNREACH); 739 } 740 741 ia = ifatoia(rt->rt_ifa); 742 } 743 744 ip->ip_dst = ip->ip_src; 745 ip->ip_ttl = MAXTTL; 746 747 /* It is safe to dereference ``ia'' iff ``rt'' is valid. */ 748 ip->ip_src = ia->ia_addr.sin_addr; 749 rtfree(rt); 750 751 if (optlen > 0) { 752 u_char *cp; 753 int opt, cnt; 754 u_int len; 755 756 /* 757 * Retrieve any source routing from the incoming packet; 758 * add on any record-route or timestamp options. 759 */ 760 cp = (u_char *) (ip + 1); 761 if (op && (opts = ip_srcroute(m)) == NULL && 762 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) { 763 opts->m_len = sizeof(struct in_addr); 764 mtod(opts, struct in_addr *)->s_addr = 0; 765 } 766 if (op && opts) { 767 #ifdef ICMPPRINTFS 768 if (icmpprintfs) 769 printf("icmp_reflect optlen %d rt %d => ", 770 optlen, opts->m_len); 771 #endif 772 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) { 773 opt = cp[IPOPT_OPTVAL]; 774 if (opt == IPOPT_EOL) 775 break; 776 if (opt == IPOPT_NOP) 777 len = 1; 778 else { 779 if (cnt < IPOPT_OLEN + sizeof(*cp)) 780 break; 781 len = cp[IPOPT_OLEN]; 782 if (len < IPOPT_OLEN + sizeof(*cp) || 783 len > cnt) 784 break; 785 } 786 /* 787 * Should check for overflow, but it 788 * "can't happen" 789 */ 790 if (opt == IPOPT_RR || opt == IPOPT_TS || 791 opt == IPOPT_SECURITY) { 792 memcpy(mtod(opts, caddr_t) + 793 opts->m_len, cp, len); 794 opts->m_len += len; 795 } 796 } 797 /* Terminate & pad, if necessary */ 798 if ((cnt = opts->m_len % 4) != 0) 799 for (; cnt < 4; cnt++) { 800 *(mtod(opts, caddr_t) + opts->m_len) = 801 IPOPT_EOL; 802 opts->m_len++; 803 } 804 #ifdef ICMPPRINTFS 805 if (icmpprintfs) 806 printf("%d\n", opts->m_len); 807 #endif 808 } 809 ip_stripoptions(m); 810 } 811 m->m_flags &= ~(M_BCAST|M_MCAST); 812 if (op) 813 *op = opts; 814 815 return (0); 816 } 817 818 /* 819 * Send an icmp packet back to the ip level 820 */ 821 void 822 icmp_send(struct mbuf *m, struct mbuf *opts) 823 { 824 struct ip *ip = mtod(m, struct ip *); 825 int hlen; 826 struct icmp *icp; 827 828 hlen = ip->ip_hl << 2; 829 icp = (struct icmp *)(mtod(m, caddr_t) + hlen); 830 icp->icmp_cksum = 0; 831 m->m_pkthdr.csum_flags = M_ICMP_CSUM_OUT; 832 #ifdef ICMPPRINTFS 833 if (icmpprintfs) { 834 char dst[INET_ADDRSTRLEN], src[INET_ADDRSTRLEN]; 835 836 inet_ntop(AF_INET, &ip->ip_dst, dst, sizeof(dst)); 837 inet_ntop(AF_INET, &ip->ip_src, src, sizeof(src)); 838 839 printf("icmp_send dst %s src %s\n", dst, src); 840 } 841 #endif 842 if (opts != NULL) 843 m = ip_insertoptions(m, opts, &hlen); 844 845 ip_send(m); 846 } 847 848 u_int32_t 849 iptime(void) 850 { 851 struct timeval atv; 852 u_long t; 853 854 microtime(&atv); 855 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000; 856 return (htonl(t)); 857 } 858 859 int 860 icmp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 861 size_t newlen) 862 { 863 int error; 864 865 /* All sysctl names at this level are terminal. */ 866 if (namelen != 1) 867 return (ENOTDIR); 868 869 switch (name[0]) { 870 case ICMPCTL_REDIRTIMEOUT: 871 872 NET_LOCK(); 873 error = sysctl_int(oldp, oldlenp, newp, newlen, 874 &icmp_redirtimeout); 875 if (icmp_redirect_timeout_q != NULL) { 876 if (icmp_redirtimeout == 0) { 877 rt_timer_queue_destroy(icmp_redirect_timeout_q); 878 icmp_redirect_timeout_q = NULL; 879 } else 880 rt_timer_queue_change(icmp_redirect_timeout_q, 881 icmp_redirtimeout); 882 } else if (icmp_redirtimeout > 0) { 883 icmp_redirect_timeout_q = 884 rt_timer_queue_create(icmp_redirtimeout); 885 } 886 NET_UNLOCK(); 887 break; 888 889 case ICMPCTL_STATS: 890 error = icmp_sysctl_icmpstat(oldp, oldlenp, newp); 891 break; 892 893 default: 894 if (name[0] < ICMPCTL_MAXID) { 895 NET_LOCK(); 896 error = sysctl_int_arr(icmpctl_vars, name, namelen, 897 oldp, oldlenp, newp, newlen); 898 NET_UNLOCK(); 899 break; 900 } 901 error = ENOPROTOOPT; 902 break; 903 } 904 905 return (error); 906 } 907 908 int 909 icmp_sysctl_icmpstat(void *oldp, size_t *oldlenp, void *newp) 910 { 911 uint64_t counters[icps_ncounters]; 912 struct icmpstat icmpstat; 913 u_long *words = (u_long *)&icmpstat; 914 int i; 915 916 CTASSERT(sizeof(icmpstat) == (nitems(counters) * sizeof(u_long))); 917 memset(&icmpstat, 0, sizeof icmpstat); 918 counters_read(icmpcounters, counters, nitems(counters)); 919 920 for (i = 0; i < nitems(counters); i++) 921 words[i] = (u_long)counters[i]; 922 923 return (sysctl_rdstruct(oldp, oldlenp, newp, 924 &icmpstat, sizeof(icmpstat))); 925 } 926 927 struct rtentry * 928 icmp_mtudisc_clone(struct in_addr dst, u_int rtableid) 929 { 930 struct sockaddr_in sin; 931 struct rtentry *rt; 932 int error; 933 934 memset(&sin, 0, sizeof(sin)); 935 sin.sin_family = AF_INET; 936 sin.sin_len = sizeof(sin); 937 sin.sin_addr = dst; 938 939 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid); 940 941 /* Check if the route is actually usable */ 942 if (!rtisvalid(rt) || (rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE))) 943 goto bad; 944 945 /* 946 * No PMTU for local routes and permanent neighbors, 947 * ARP and NDP use the same expire timer as the route. 948 */ 949 if (ISSET(rt->rt_flags, RTF_LOCAL) || 950 (ISSET(rt->rt_flags, RTF_LLINFO) && rt->rt_expire == 0)) 951 goto bad; 952 953 /* If we didn't get a host route, allocate one */ 954 if ((rt->rt_flags & RTF_HOST) == 0) { 955 struct rtentry *nrt; 956 struct rt_addrinfo info; 957 struct sockaddr_rtlabel sa_rl; 958 959 memset(&info, 0, sizeof(info)); 960 info.rti_ifa = rt->rt_ifa; 961 info.rti_flags = RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC; 962 info.rti_info[RTAX_DST] = sintosa(&sin); 963 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 964 info.rti_info[RTAX_LABEL] = 965 rtlabel_id2sa(rt->rt_labelid, &sa_rl); 966 967 error = rtrequest(RTM_ADD, &info, rt->rt_priority, &nrt, 968 rtableid); 969 if (error) 970 goto bad; 971 nrt->rt_rmx = rt->rt_rmx; 972 rtfree(rt); 973 rt = nrt; 974 rtm_send(rt, RTM_ADD, 0, rtableid); 975 } 976 error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q, 977 rtableid); 978 if (error) 979 goto bad; 980 981 return (rt); 982 bad: 983 rtfree(rt); 984 return (NULL); 985 } 986 987 /* Table of common MTUs: */ 988 static const u_short mtu_table[] = { 989 65535, 65280, 32000, 17914, 9180, 8166, 990 4352, 2002, 1492, 1006, 508, 296, 68, 0 991 }; 992 993 void 994 icmp_mtudisc(struct icmp *icp, u_int rtableid) 995 { 996 struct rtentry *rt; 997 struct ifnet *ifp; 998 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */ 999 1000 rt = icmp_mtudisc_clone(icp->icmp_ip.ip_dst, rtableid); 1001 if (rt == NULL) 1002 return; 1003 1004 ifp = if_get(rt->rt_ifidx); 1005 if (ifp == NULL) { 1006 rtfree(rt); 1007 return; 1008 } 1009 1010 if (mtu == 0) { 1011 int i = 0; 1012 1013 mtu = ntohs(icp->icmp_ip.ip_len); 1014 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */ 1015 if (mtu > rt->rt_mtu && rt->rt_mtu != 0) 1016 mtu -= (icp->icmp_ip.ip_hl << 2); 1017 1018 /* If we still can't guess a value, try the route */ 1019 if (mtu == 0) { 1020 mtu = rt->rt_mtu; 1021 1022 /* If no route mtu, default to the interface mtu */ 1023 1024 if (mtu == 0) 1025 mtu = ifp->if_mtu; 1026 } 1027 1028 for (i = 0; i < nitems(mtu_table); i++) 1029 if (mtu > mtu_table[i]) { 1030 mtu = mtu_table[i]; 1031 break; 1032 } 1033 } 1034 1035 /* 1036 * XXX: RTV_MTU is overloaded, since the admin can set it 1037 * to turn off PMTU for a route, and the kernel can 1038 * set it to indicate a serious problem with PMTU 1039 * on a route. We should be using a separate flag 1040 * for the kernel to indicate this. 1041 */ 1042 if ((rt->rt_locks & RTV_MTU) == 0) { 1043 if (mtu < 296 || mtu > ifp->if_mtu) 1044 rt->rt_locks |= RTV_MTU; 1045 else if (rt->rt_mtu > mtu || rt->rt_mtu == 0) 1046 rt->rt_mtu = mtu; 1047 } 1048 1049 if_put(ifp); 1050 rtfree(rt); 1051 } 1052 1053 void 1054 icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer *r) 1055 { 1056 struct ifnet *ifp; 1057 1058 NET_ASSERT_LOCKED(); 1059 1060 ifp = if_get(rt->rt_ifidx); 1061 if (ifp == NULL) 1062 return; 1063 1064 if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) { 1065 void (*ctlfunc)(int, struct sockaddr *, u_int, void *); 1066 struct sockaddr_in sin; 1067 1068 sin = *satosin(rt_key(rt)); 1069 1070 rtdeletemsg(rt, ifp, r->rtt_tableid); 1071 1072 /* Notify TCP layer of increased Path MTU estimate */ 1073 ctlfunc = inetsw[ip_protox[IPPROTO_TCP]].pr_ctlinput; 1074 if (ctlfunc) 1075 (*ctlfunc)(PRC_MTUINC, sintosa(&sin), 1076 r->rtt_tableid, NULL); 1077 } else { 1078 if ((rt->rt_locks & RTV_MTU) == 0) 1079 rt->rt_mtu = 0; 1080 } 1081 1082 if_put(ifp); 1083 } 1084 1085 /* 1086 * Perform rate limit check. 1087 * Returns 0 if it is okay to send the icmp packet. 1088 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate 1089 * limitation. 1090 * 1091 * XXX per-destination/type check necessary? 1092 */ 1093 int 1094 icmp_ratelimit(const struct in_addr *dst, const int type, const int code) 1095 { 1096 /* PPS limit */ 1097 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count, 1098 icmperrppslim)) 1099 return 1; /* The packet is subject to rate limit */ 1100 return 0; /* okay to send */ 1101 } 1102 1103 void 1104 icmp_redirect_timeout(struct rtentry *rt, struct rttimer *r) 1105 { 1106 struct ifnet *ifp; 1107 1108 NET_ASSERT_LOCKED(); 1109 1110 ifp = if_get(rt->rt_ifidx); 1111 if (ifp == NULL) 1112 return; 1113 1114 if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) { 1115 rtdeletemsg(rt, ifp, r->rtt_tableid); 1116 } 1117 1118 if_put(ifp); 1119 } 1120 1121 int 1122 icmp_do_exthdr(struct mbuf *m, u_int16_t class, u_int8_t ctype, void *buf, 1123 size_t len) 1124 { 1125 struct ip *ip = mtod(m, struct ip *); 1126 int hlen, off; 1127 struct mbuf *n; 1128 struct icmp *icp; 1129 struct icmp_ext_hdr *ieh; 1130 struct { 1131 struct icmp_ext_hdr ieh; 1132 struct icmp_ext_obj_hdr ieo; 1133 } hdr; 1134 1135 hlen = ip->ip_hl << 2; 1136 icp = (struct icmp *)(mtod(m, caddr_t) + hlen); 1137 if (icp->icmp_type != ICMP_TIMXCEED && icp->icmp_type != ICMP_UNREACH && 1138 icp->icmp_type != ICMP_PARAMPROB) 1139 /* exthdr not supported */ 1140 return (0); 1141 1142 if (icp->icmp_length != 0) 1143 /* exthdr already present, giving up */ 1144 return (0); 1145 1146 /* the actual offset starts after the common ICMP header */ 1147 hlen += ICMP_MINLEN; 1148 /* exthdr must start on a word boundary */ 1149 off = roundup(ntohs(ip->ip_len) - hlen, sizeof(u_int32_t)); 1150 /* ... and at an offset of ICMP_EXT_OFFSET or bigger */ 1151 off = max(off, ICMP_EXT_OFFSET); 1152 icp->icmp_length = off / sizeof(u_int32_t); 1153 1154 memset(&hdr, 0, sizeof(hdr)); 1155 hdr.ieh.ieh_version = ICMP_EXT_HDR_VERSION; 1156 hdr.ieo.ieo_length = htons(sizeof(struct icmp_ext_obj_hdr) + len); 1157 hdr.ieo.ieo_cnum = class; 1158 hdr.ieo.ieo_ctype = ctype; 1159 1160 if (m_copyback(m, hlen + off, sizeof(hdr), &hdr, M_NOWAIT) || 1161 m_copyback(m, hlen + off + sizeof(hdr), len, buf, M_NOWAIT)) { 1162 m_freem(m); 1163 return (ENOBUFS); 1164 } 1165 1166 /* calculate checksum */ 1167 n = m_getptr(m, hlen + off, &off); 1168 if (n == NULL) 1169 panic("icmp_do_exthdr: m_getptr failure"); 1170 ieh = (struct icmp_ext_hdr *)(mtod(n, caddr_t) + off); 1171 ieh->ieh_cksum = in4_cksum(n, 0, off, sizeof(hdr) + len); 1172 1173 ip->ip_len = htons(m->m_pkthdr.len); 1174 1175 return (0); 1176 } 1177