1 /* $OpenBSD: ip_icmp.c,v 1.187 2021/07/26 20:44:44 bluhm Exp $ */ 2 /* $NetBSD: ip_icmp.c,v 1.19 1996/02/13 23:42:22 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include "carp.h" 72 #include "pf.h" 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/mbuf.h> 77 #include <sys/protosw.h> 78 #include <sys/socket.h> 79 #include <sys/sysctl.h> 80 81 #include <net/if.h> 82 #include <net/if_var.h> 83 #include <net/route.h> 84 85 #include <netinet/in.h> 86 #include <netinet/in_systm.h> 87 #include <netinet/in_var.h> 88 #include <netinet/ip.h> 89 #include <netinet/ip_icmp.h> 90 #include <netinet/ip_var.h> 91 #include <netinet/icmp_var.h> 92 93 #if NCARP > 0 94 #include <net/if_types.h> 95 #include <netinet/ip_carp.h> 96 #endif 97 98 #if NPF > 0 99 #include <net/pfvar.h> 100 #endif 101 102 /* 103 * ICMP routines: error generation, receive packet processing, and 104 * routines to turnaround packets back to the originator, and 105 * host table maintenance routines. 106 */ 107 108 #ifdef ICMPPRINTFS 109 int icmpprintfs = 0; /* Settable from ddb */ 110 #endif 111 112 /* values controllable via sysctl */ 113 int icmpmaskrepl = 0; 114 int icmpbmcastecho = 0; 115 int icmptstamprepl = 1; 116 int icmperrppslim = 100; 117 int icmp_rediraccept = 0; 118 int icmp_redirtimeout = 10 * 60; 119 120 static int icmperrpps_count = 0; 121 static struct timeval icmperrppslim_last; 122 123 static struct rttimer_queue *icmp_redirect_timeout_q = NULL; 124 struct cpumem *icmpcounters; 125 126 const struct sysctl_bounded_args icmpctl_vars[] = { 127 { ICMPCTL_MASKREPL, &icmpmaskrepl, 0, 1 }, 128 { ICMPCTL_BMCASTECHO, &icmpbmcastecho, 0, 1 }, 129 { ICMPCTL_ERRPPSLIMIT, &icmperrppslim, -1, INT_MAX }, 130 { ICMPCTL_REDIRACCEPT, &icmp_rediraccept, 0, 1 }, 131 { ICMPCTL_TSTAMPREPL, &icmptstamprepl, 0, 1 }, 132 }; 133 134 135 void icmp_mtudisc_timeout(struct rtentry *, struct rttimer *); 136 int icmp_ratelimit(const struct in_addr *, const int, const int); 137 void icmp_redirect_timeout(struct rtentry *, struct rttimer *); 138 int icmp_input_if(struct ifnet *, struct mbuf **, int *, int, int); 139 int icmp_sysctl_icmpstat(void *, size_t *, void *); 140 141 void 142 icmp_init(void) 143 { 144 icmpcounters = counters_alloc(icps_ncounters); 145 /* 146 * This is only useful if the user initializes redirtimeout to 147 * something other than zero. 148 */ 149 if (icmp_redirtimeout != 0) { 150 icmp_redirect_timeout_q = 151 rt_timer_queue_create(icmp_redirtimeout); 152 } 153 } 154 155 struct mbuf * 156 icmp_do_error(struct mbuf *n, int type, int code, u_int32_t dest, int destmtu) 157 { 158 struct ip *oip = mtod(n, struct ip *), *nip; 159 unsigned oiplen = oip->ip_hl << 2; 160 struct icmp *icp; 161 struct mbuf *m; 162 unsigned icmplen, mblen; 163 164 #ifdef ICMPPRINTFS 165 if (icmpprintfs) 166 printf("icmp_error(%x, %d, %d)\n", oip, type, code); 167 #endif 168 if (type != ICMP_REDIRECT) 169 icmpstat_inc(icps_error); 170 /* 171 * Don't send error if not the first fragment of message. 172 * Don't error if the old packet protocol was ICMP 173 * error message, only known informational types. 174 */ 175 if (oip->ip_off & htons(IP_OFFMASK)) 176 goto freeit; 177 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT && 178 n->m_len >= oiplen + ICMP_MINLEN && 179 !ICMP_INFOTYPE(((struct icmp *) 180 ((caddr_t)oip + oiplen))->icmp_type)) { 181 icmpstat_inc(icps_oldicmp); 182 goto freeit; 183 } 184 /* Don't send error in response to a multicast or broadcast packet */ 185 if (n->m_flags & (M_BCAST|M_MCAST)) 186 goto freeit; 187 188 /* 189 * First, do a rate limitation check. 190 */ 191 if (icmp_ratelimit(&oip->ip_src, type, code)) { 192 icmpstat_inc(icps_toofreq); 193 goto freeit; 194 } 195 196 /* 197 * Now, formulate icmp message 198 */ 199 icmplen = oiplen + min(8, ntohs(oip->ip_len)); 200 /* 201 * Defend against mbuf chains shorter than oip->ip_len: 202 */ 203 mblen = 0; 204 for (m = n; m && (mblen < icmplen); m = m->m_next) 205 mblen += m->m_len; 206 icmplen = min(mblen, icmplen); 207 208 /* 209 * As we are not required to return everything we have, 210 * we return whatever we can return at ease. 211 * 212 * Note that ICMP datagrams longer than 576 octets are out of spec 213 * according to RFC1812; 214 */ 215 216 KASSERT(ICMP_MINLEN + sizeof (struct ip) <= MCLBYTES); 217 218 if (sizeof (struct ip) + icmplen + ICMP_MINLEN > MCLBYTES) 219 icmplen = MCLBYTES - ICMP_MINLEN - sizeof (struct ip); 220 221 m = m_gethdr(M_DONTWAIT, MT_HEADER); 222 if (m && ((sizeof (struct ip) + icmplen + ICMP_MINLEN + 223 sizeof(long) - 1) &~ (sizeof(long) - 1)) > MHLEN) { 224 MCLGET(m, M_DONTWAIT); 225 if ((m->m_flags & M_EXT) == 0) { 226 m_freem(m); 227 m = NULL; 228 } 229 } 230 if (m == NULL) 231 goto freeit; 232 /* keep in same rtable and preserve other pkthdr bits */ 233 m->m_pkthdr.ph_rtableid = n->m_pkthdr.ph_rtableid; 234 m->m_pkthdr.ph_ifidx = n->m_pkthdr.ph_ifidx; 235 /* move PF_GENERATED to new packet, if existent XXX preserve more? */ 236 if (n->m_pkthdr.pf.flags & PF_TAG_GENERATED) 237 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 238 m->m_pkthdr.len = m->m_len = icmplen + ICMP_MINLEN; 239 m_align(m, m->m_len); 240 icp = mtod(m, struct icmp *); 241 if ((u_int)type > ICMP_MAXTYPE) 242 panic("icmp_error"); 243 icmpstat_inc(icps_outhist + type); 244 icp->icmp_type = type; 245 if (type == ICMP_REDIRECT) 246 icp->icmp_gwaddr.s_addr = dest; 247 else { 248 icp->icmp_void = 0; 249 /* 250 * The following assignments assume an overlay with the 251 * zeroed icmp_void field. 252 */ 253 if (type == ICMP_PARAMPROB) { 254 icp->icmp_pptr = code; 255 code = 0; 256 } else if (type == ICMP_UNREACH && 257 code == ICMP_UNREACH_NEEDFRAG && destmtu) 258 icp->icmp_nextmtu = htons(destmtu); 259 } 260 261 icp->icmp_code = code; 262 m_copydata(n, 0, icmplen, &icp->icmp_ip); 263 264 /* 265 * Now, copy old ip header (without options) 266 * in front of icmp message. 267 */ 268 m = m_prepend(m, sizeof(struct ip), M_DONTWAIT); 269 if (m == NULL) 270 goto freeit; 271 nip = mtod(m, struct ip *); 272 /* ip_v set in ip_output */ 273 nip->ip_hl = sizeof(struct ip) >> 2; 274 nip->ip_tos = 0; 275 nip->ip_len = htons(m->m_len); 276 /* ip_id set in ip_output */ 277 nip->ip_off = 0; 278 /* ip_ttl set in icmp_reflect */ 279 nip->ip_p = IPPROTO_ICMP; 280 nip->ip_src = oip->ip_src; 281 nip->ip_dst = oip->ip_dst; 282 283 m_freem(n); 284 return (m); 285 286 freeit: 287 m_freem(n); 288 return (NULL); 289 } 290 291 /* 292 * Generate an error packet of type error 293 * in response to bad packet ip. 294 * 295 * The ip packet inside has ip_off and ip_len in host byte order. 296 */ 297 void 298 icmp_error(struct mbuf *n, int type, int code, u_int32_t dest, int destmtu) 299 { 300 struct mbuf *m; 301 302 m = icmp_do_error(n, type, code, dest, destmtu); 303 if (m != NULL) 304 if (!icmp_reflect(m, NULL, NULL)) 305 icmp_send(m, NULL); 306 } 307 308 /* 309 * Process a received ICMP message. 310 */ 311 int 312 icmp_input(struct mbuf **mp, int *offp, int proto, int af) 313 { 314 struct ifnet *ifp; 315 316 ifp = if_get((*mp)->m_pkthdr.ph_ifidx); 317 if (ifp == NULL) { 318 m_freemp(mp); 319 return IPPROTO_DONE; 320 } 321 322 proto = icmp_input_if(ifp, mp, offp, proto, af); 323 if_put(ifp); 324 return proto; 325 } 326 327 int 328 icmp_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto, int af) 329 { 330 struct mbuf *m = *mp; 331 int hlen = *offp; 332 struct icmp *icp; 333 struct ip *ip = mtod(m, struct ip *); 334 struct sockaddr_in sin; 335 int icmplen, i, code; 336 struct in_ifaddr *ia; 337 void (*ctlfunc)(int, struct sockaddr *, u_int, void *); 338 struct mbuf *opts; 339 340 /* 341 * Locate icmp structure in mbuf, and check 342 * that not corrupted and of at least minimum length. 343 */ 344 icmplen = ntohs(ip->ip_len) - hlen; 345 #ifdef ICMPPRINTFS 346 if (icmpprintfs) { 347 char dst[INET_ADDRSTRLEN], src[INET_ADDRSTRLEN]; 348 349 inet_ntop(AF_INET, &ip->ip_dst, dst, sizeof(dst)); 350 inet_ntop(AF_INET, &ip->ip_src, src, sizeof(src)); 351 352 printf("icmp_input from %s to %s, len %d\n", src, dst, icmplen); 353 } 354 #endif 355 if (icmplen < ICMP_MINLEN) { 356 icmpstat_inc(icps_tooshort); 357 goto freeit; 358 } 359 i = hlen + min(icmplen, ICMP_ADVLENMAX); 360 if ((m = *mp = m_pullup(m, i)) == NULL) { 361 icmpstat_inc(icps_tooshort); 362 return IPPROTO_DONE; 363 } 364 ip = mtod(m, struct ip *); 365 if (in4_cksum(m, 0, hlen, icmplen)) { 366 icmpstat_inc(icps_checksum); 367 goto freeit; 368 } 369 370 icp = (struct icmp *)(mtod(m, caddr_t) + hlen); 371 #ifdef ICMPPRINTFS 372 /* 373 * Message type specific processing. 374 */ 375 if (icmpprintfs) 376 printf("icmp_input, type %d code %d\n", icp->icmp_type, 377 icp->icmp_code); 378 #endif 379 if (icp->icmp_type > ICMP_MAXTYPE) 380 goto raw; 381 #if NPF > 0 382 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 383 switch (icp->icmp_type) { 384 /* 385 * As pf_icmp_mapping() considers redirects belonging to a 386 * diverted connection, we must include it here. 387 */ 388 case ICMP_REDIRECT: 389 /* FALLTHROUGH */ 390 /* 391 * These ICMP types map to other connections. They must be 392 * delivered to pr_ctlinput() also for diverted connections. 393 */ 394 case ICMP_UNREACH: 395 case ICMP_TIMXCEED: 396 case ICMP_PARAMPROB: 397 case ICMP_SOURCEQUENCH: 398 /* 399 * Do not use the divert-to property of the TCP or UDP 400 * rule when doing the PCB lookup for the raw socket. 401 */ 402 m->m_pkthdr.pf.flags &=~ PF_TAG_DIVERTED; 403 break; 404 default: 405 goto raw; 406 } 407 } 408 #endif /* NPF */ 409 icmpstat_inc(icps_inhist + icp->icmp_type); 410 code = icp->icmp_code; 411 switch (icp->icmp_type) { 412 413 case ICMP_UNREACH: 414 switch (code) { 415 case ICMP_UNREACH_NET: 416 case ICMP_UNREACH_HOST: 417 case ICMP_UNREACH_PROTOCOL: 418 case ICMP_UNREACH_PORT: 419 case ICMP_UNREACH_SRCFAIL: 420 code += PRC_UNREACH_NET; 421 break; 422 423 case ICMP_UNREACH_NEEDFRAG: 424 code = PRC_MSGSIZE; 425 break; 426 427 case ICMP_UNREACH_NET_UNKNOWN: 428 case ICMP_UNREACH_NET_PROHIB: 429 case ICMP_UNREACH_TOSNET: 430 code = PRC_UNREACH_NET; 431 break; 432 433 case ICMP_UNREACH_HOST_UNKNOWN: 434 case ICMP_UNREACH_ISOLATED: 435 case ICMP_UNREACH_HOST_PROHIB: 436 case ICMP_UNREACH_TOSHOST: 437 case ICMP_UNREACH_FILTER_PROHIB: 438 case ICMP_UNREACH_HOST_PRECEDENCE: 439 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 440 code = PRC_UNREACH_HOST; 441 break; 442 443 default: 444 goto badcode; 445 } 446 goto deliver; 447 448 case ICMP_TIMXCEED: 449 if (code > 1) 450 goto badcode; 451 code += PRC_TIMXCEED_INTRANS; 452 goto deliver; 453 454 case ICMP_PARAMPROB: 455 if (code > 1) 456 goto badcode; 457 code = PRC_PARAMPROB; 458 goto deliver; 459 460 case ICMP_SOURCEQUENCH: 461 if (code) 462 goto badcode; 463 code = PRC_QUENCH; 464 deliver: 465 /* 466 * Problem with datagram; advise higher level routines. 467 */ 468 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 469 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { 470 icmpstat_inc(icps_badlen); 471 goto freeit; 472 } 473 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr)) 474 goto badcode; 475 #ifdef INET6 476 /* Get more contiguous data for a v6 in v4 ICMP message. */ 477 if (icp->icmp_ip.ip_p == IPPROTO_IPV6) { 478 if (icmplen < ICMP_V6ADVLENMIN || 479 icmplen < ICMP_V6ADVLEN(icp)) { 480 icmpstat_inc(icps_badlen); 481 goto freeit; 482 } 483 } 484 #endif /* INET6 */ 485 #ifdef ICMPPRINTFS 486 if (icmpprintfs) 487 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); 488 #endif 489 memset(&sin, 0, sizeof(sin)); 490 sin.sin_family = AF_INET; 491 sin.sin_len = sizeof(struct sockaddr_in); 492 sin.sin_addr = icp->icmp_ip.ip_dst; 493 #if NCARP > 0 494 if (carp_lsdrop(ifp, m, AF_INET, &sin.sin_addr.s_addr, 495 &ip->ip_dst.s_addr, 1)) 496 goto freeit; 497 #endif 498 /* 499 * XXX if the packet contains [IPv4 AH TCP], we can't make a 500 * notification to TCP layer. 501 */ 502 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput; 503 if (ctlfunc) 504 (*ctlfunc)(code, sintosa(&sin), m->m_pkthdr.ph_rtableid, 505 &icp->icmp_ip); 506 break; 507 508 badcode: 509 icmpstat_inc(icps_badcode); 510 break; 511 512 case ICMP_ECHO: 513 if (!icmpbmcastecho && 514 (m->m_flags & (M_MCAST | M_BCAST)) != 0) { 515 icmpstat_inc(icps_bmcastecho); 516 break; 517 } 518 icp->icmp_type = ICMP_ECHOREPLY; 519 goto reflect; 520 521 case ICMP_TSTAMP: 522 if (icmptstamprepl == 0) 523 break; 524 525 if (!icmpbmcastecho && 526 (m->m_flags & (M_MCAST | M_BCAST)) != 0) { 527 icmpstat_inc(icps_bmcastecho); 528 break; 529 } 530 if (icmplen < ICMP_TSLEN) { 531 icmpstat_inc(icps_badlen); 532 break; 533 } 534 icp->icmp_type = ICMP_TSTAMPREPLY; 535 icp->icmp_rtime = iptime(); 536 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */ 537 goto reflect; 538 539 case ICMP_MASKREQ: 540 if (icmpmaskrepl == 0) 541 break; 542 if (icmplen < ICMP_MASKLEN) { 543 icmpstat_inc(icps_badlen); 544 break; 545 } 546 /* 547 * We are not able to respond with all ones broadcast 548 * unless we receive it over a point-to-point interface. 549 */ 550 memset(&sin, 0, sizeof(sin)); 551 sin.sin_family = AF_INET; 552 sin.sin_len = sizeof(struct sockaddr_in); 553 if (ip->ip_dst.s_addr == INADDR_BROADCAST || 554 ip->ip_dst.s_addr == INADDR_ANY) 555 sin.sin_addr = ip->ip_src; 556 else 557 sin.sin_addr = ip->ip_dst; 558 if (ifp == NULL) 559 break; 560 ia = ifatoia(ifaof_ifpforaddr(sintosa(&sin), ifp)); 561 if (ia == NULL) 562 break; 563 icp->icmp_type = ICMP_MASKREPLY; 564 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr; 565 if (ip->ip_src.s_addr == 0) { 566 if (ifp->if_flags & IFF_BROADCAST) { 567 if (ia->ia_broadaddr.sin_addr.s_addr) 568 ip->ip_src = ia->ia_broadaddr.sin_addr; 569 else 570 ip->ip_src.s_addr = INADDR_BROADCAST; 571 } 572 else if (ifp->if_flags & IFF_POINTOPOINT) 573 ip->ip_src = ia->ia_dstaddr.sin_addr; 574 } 575 reflect: 576 #if NCARP > 0 577 if (carp_lsdrop(ifp, m, AF_INET, &ip->ip_src.s_addr, 578 &ip->ip_dst.s_addr, 1)) 579 goto freeit; 580 #endif 581 icmpstat_inc(icps_reflect); 582 icmpstat_inc(icps_outhist + icp->icmp_type); 583 if (!icmp_reflect(m, &opts, NULL)) { 584 icmp_send(m, opts); 585 m_free(opts); 586 } 587 return IPPROTO_DONE; 588 589 case ICMP_REDIRECT: 590 { 591 struct sockaddr_in sdst; 592 struct sockaddr_in sgw; 593 struct sockaddr_in ssrc; 594 struct rtentry *newrt = NULL; 595 596 if (icmp_rediraccept == 0 || ipforwarding == 1) 597 goto freeit; 598 if (code > 3) 599 goto badcode; 600 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 601 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { 602 icmpstat_inc(icps_badlen); 603 break; 604 } 605 /* 606 * Short circuit routing redirects to force 607 * immediate change in the kernel's routing 608 * tables. The message is also handed to anyone 609 * listening on a raw socket (e.g. the routing 610 * daemon for use in updating its tables). 611 */ 612 memset(&sdst, 0, sizeof(sdst)); 613 memset(&sgw, 0, sizeof(sgw)); 614 memset(&ssrc, 0, sizeof(ssrc)); 615 sdst.sin_family = sgw.sin_family = ssrc.sin_family = AF_INET; 616 sdst.sin_len = sgw.sin_len = ssrc.sin_len = sizeof(sdst); 617 memcpy(&sdst.sin_addr, &icp->icmp_ip.ip_dst, 618 sizeof(sdst.sin_addr)); 619 memcpy(&sgw.sin_addr, &icp->icmp_gwaddr, 620 sizeof(sgw.sin_addr)); 621 memcpy(&ssrc.sin_addr, &ip->ip_src, 622 sizeof(ssrc.sin_addr)); 623 624 #ifdef ICMPPRINTFS 625 if (icmpprintfs) { 626 char gw[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 627 628 inet_ntop(AF_INET, &icp->icmp_gwaddr, gw, sizeof(gw)); 629 inet_ntop(AF_INET, &icp->icmp_ip.ip_dst, 630 dst, sizeof(dst)); 631 632 printf("redirect dst %s to %s\n", dst, gw); 633 } 634 #endif 635 636 #if NCARP > 0 637 if (carp_lsdrop(ifp, m, AF_INET, &sdst.sin_addr.s_addr, 638 &ip->ip_dst.s_addr, 1)) 639 goto freeit; 640 #endif 641 rtredirect(sintosa(&sdst), sintosa(&sgw), 642 sintosa(&ssrc), &newrt, m->m_pkthdr.ph_rtableid); 643 if (newrt != NULL && icmp_redirtimeout != 0) { 644 (void)rt_timer_add(newrt, icmp_redirect_timeout, 645 icmp_redirect_timeout_q, m->m_pkthdr.ph_rtableid); 646 } 647 if (newrt != NULL) 648 rtfree(newrt); 649 pfctlinput(PRC_REDIRECT_HOST, sintosa(&sdst)); 650 break; 651 } 652 /* 653 * No kernel processing for the following; 654 * just fall through to send to raw listener. 655 */ 656 case ICMP_ECHOREPLY: 657 case ICMP_ROUTERADVERT: 658 case ICMP_ROUTERSOLICIT: 659 case ICMP_TSTAMPREPLY: 660 case ICMP_IREQREPLY: 661 case ICMP_MASKREPLY: 662 case ICMP_TRACEROUTE: 663 case ICMP_DATACONVERR: 664 case ICMP_MOBILE_REDIRECT: 665 case ICMP_IPV6_WHEREAREYOU: 666 case ICMP_IPV6_IAMHERE: 667 case ICMP_MOBILE_REGREQUEST: 668 case ICMP_MOBILE_REGREPLY: 669 case ICMP_PHOTURIS: 670 default: 671 break; 672 } 673 674 raw: 675 return rip_input(mp, offp, proto, af); 676 677 freeit: 678 m_freem(m); 679 return IPPROTO_DONE; 680 } 681 682 /* 683 * Reflect the ip packet back to the source 684 */ 685 int 686 icmp_reflect(struct mbuf *m, struct mbuf **op, struct in_ifaddr *ia) 687 { 688 struct ip *ip = mtod(m, struct ip *); 689 struct mbuf *opts = NULL; 690 struct sockaddr_in sin; 691 struct rtentry *rt = NULL; 692 int optlen = (ip->ip_hl << 2) - sizeof(struct ip); 693 u_int rtableid; 694 u_int8_t pfflags; 695 696 if (!in_canforward(ip->ip_src) && 697 ((ip->ip_src.s_addr & IN_CLASSA_NET) != 698 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) { 699 m_freem(m); /* Bad return address */ 700 return (EHOSTUNREACH); 701 } 702 703 if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) { 704 m_freem(m); 705 return (ELOOP); 706 } 707 rtableid = m->m_pkthdr.ph_rtableid; 708 pfflags = m->m_pkthdr.pf.flags; 709 m_resethdr(m); 710 m->m_pkthdr.ph_rtableid = rtableid; 711 m->m_pkthdr.pf.flags = pfflags & PF_TAG_GENERATED; 712 713 /* 714 * If the incoming packet was addressed directly to us, 715 * use dst as the src for the reply. For broadcast, use 716 * the address which corresponds to the incoming interface. 717 */ 718 if (ia == NULL) { 719 memset(&sin, 0, sizeof(sin)); 720 sin.sin_len = sizeof(sin); 721 sin.sin_family = AF_INET; 722 sin.sin_addr = ip->ip_dst; 723 724 rt = rtalloc(sintosa(&sin), 0, rtableid); 725 if (rtisvalid(rt) && 726 ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) 727 ia = ifatoia(rt->rt_ifa); 728 } 729 730 /* 731 * The following happens if the packet was not addressed to us. 732 * Use the new source address and do a route lookup. If it fails 733 * drop the packet as there is no path to the host. 734 */ 735 if (ia == NULL) { 736 rtfree(rt); 737 738 memset(&sin, 0, sizeof(sin)); 739 sin.sin_len = sizeof(sin); 740 sin.sin_family = AF_INET; 741 sin.sin_addr = ip->ip_src; 742 743 /* keep packet in the original virtual instance */ 744 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid); 745 if (rt == NULL) { 746 ipstat_inc(ips_noroute); 747 m_freem(m); 748 return (EHOSTUNREACH); 749 } 750 751 ia = ifatoia(rt->rt_ifa); 752 } 753 754 ip->ip_dst = ip->ip_src; 755 ip->ip_ttl = MAXTTL; 756 757 /* It is safe to dereference ``ia'' iff ``rt'' is valid. */ 758 ip->ip_src = ia->ia_addr.sin_addr; 759 rtfree(rt); 760 761 if (optlen > 0) { 762 u_char *cp; 763 int opt, cnt; 764 u_int len; 765 766 /* 767 * Retrieve any source routing from the incoming packet; 768 * add on any record-route or timestamp options. 769 */ 770 cp = (u_char *) (ip + 1); 771 if (op && (opts = ip_srcroute(m)) == NULL && 772 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) { 773 opts->m_len = sizeof(struct in_addr); 774 mtod(opts, struct in_addr *)->s_addr = 0; 775 } 776 if (op && opts) { 777 #ifdef ICMPPRINTFS 778 if (icmpprintfs) 779 printf("icmp_reflect optlen %d rt %d => ", 780 optlen, opts->m_len); 781 #endif 782 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) { 783 opt = cp[IPOPT_OPTVAL]; 784 if (opt == IPOPT_EOL) 785 break; 786 if (opt == IPOPT_NOP) 787 len = 1; 788 else { 789 if (cnt < IPOPT_OLEN + sizeof(*cp)) 790 break; 791 len = cp[IPOPT_OLEN]; 792 if (len < IPOPT_OLEN + sizeof(*cp) || 793 len > cnt) 794 break; 795 } 796 /* 797 * Should check for overflow, but it 798 * "can't happen" 799 */ 800 if (opt == IPOPT_RR || opt == IPOPT_TS || 801 opt == IPOPT_SECURITY) { 802 memcpy(mtod(opts, caddr_t) + 803 opts->m_len, cp, len); 804 opts->m_len += len; 805 } 806 } 807 /* Terminate & pad, if necessary */ 808 if ((cnt = opts->m_len % 4) != 0) 809 for (; cnt < 4; cnt++) { 810 *(mtod(opts, caddr_t) + opts->m_len) = 811 IPOPT_EOL; 812 opts->m_len++; 813 } 814 #ifdef ICMPPRINTFS 815 if (icmpprintfs) 816 printf("%d\n", opts->m_len); 817 #endif 818 } 819 ip_stripoptions(m); 820 } 821 m->m_flags &= ~(M_BCAST|M_MCAST); 822 if (op) 823 *op = opts; 824 825 return (0); 826 } 827 828 /* 829 * Send an icmp packet back to the ip level 830 */ 831 void 832 icmp_send(struct mbuf *m, struct mbuf *opts) 833 { 834 struct ip *ip = mtod(m, struct ip *); 835 int hlen; 836 struct icmp *icp; 837 838 hlen = ip->ip_hl << 2; 839 icp = (struct icmp *)(mtod(m, caddr_t) + hlen); 840 icp->icmp_cksum = 0; 841 m->m_pkthdr.csum_flags = M_ICMP_CSUM_OUT; 842 #ifdef ICMPPRINTFS 843 if (icmpprintfs) { 844 char dst[INET_ADDRSTRLEN], src[INET_ADDRSTRLEN]; 845 846 inet_ntop(AF_INET, &ip->ip_dst, dst, sizeof(dst)); 847 inet_ntop(AF_INET, &ip->ip_src, src, sizeof(src)); 848 849 printf("icmp_send dst %s src %s\n", dst, src); 850 } 851 #endif 852 /* 853 * ip_send() cannot handle IP options properly. So in case we have 854 * options fill out the IP header here and use ip_send_raw() instead. 855 */ 856 if (opts != NULL) { 857 m = ip_insertoptions(m, opts, &hlen); 858 ip = mtod(m, struct ip *); 859 ip->ip_hl = (hlen >> 2); 860 ip->ip_v = IPVERSION; 861 ip->ip_off &= htons(IP_DF); 862 ip->ip_id = htons(ip_randomid()); 863 ipstat_inc(ips_localout); 864 ip_send_raw(m); 865 } else 866 ip_send(m); 867 } 868 869 u_int32_t 870 iptime(void) 871 { 872 struct timeval atv; 873 u_long t; 874 875 microtime(&atv); 876 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000; 877 return (htonl(t)); 878 } 879 880 int 881 icmp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 882 size_t newlen) 883 { 884 int error; 885 886 /* All sysctl names at this level are terminal. */ 887 if (namelen != 1) 888 return (ENOTDIR); 889 890 switch (name[0]) { 891 case ICMPCTL_REDIRTIMEOUT: 892 893 NET_LOCK(); 894 error = sysctl_int(oldp, oldlenp, newp, newlen, 895 &icmp_redirtimeout); 896 if (icmp_redirect_timeout_q != NULL) { 897 if (icmp_redirtimeout == 0) { 898 rt_timer_queue_destroy(icmp_redirect_timeout_q); 899 icmp_redirect_timeout_q = NULL; 900 } else 901 rt_timer_queue_change(icmp_redirect_timeout_q, 902 icmp_redirtimeout); 903 } else if (icmp_redirtimeout > 0) { 904 icmp_redirect_timeout_q = 905 rt_timer_queue_create(icmp_redirtimeout); 906 } 907 NET_UNLOCK(); 908 break; 909 910 case ICMPCTL_STATS: 911 error = icmp_sysctl_icmpstat(oldp, oldlenp, newp); 912 break; 913 914 default: 915 NET_LOCK(); 916 error = sysctl_bounded_arr(icmpctl_vars, nitems(icmpctl_vars), 917 name, namelen, oldp, oldlenp, newp, newlen); 918 NET_UNLOCK(); 919 break; 920 } 921 922 return (error); 923 } 924 925 int 926 icmp_sysctl_icmpstat(void *oldp, size_t *oldlenp, void *newp) 927 { 928 uint64_t counters[icps_ncounters]; 929 struct icmpstat icmpstat; 930 u_long *words = (u_long *)&icmpstat; 931 int i; 932 933 CTASSERT(sizeof(icmpstat) == (nitems(counters) * sizeof(u_long))); 934 memset(&icmpstat, 0, sizeof icmpstat); 935 counters_read(icmpcounters, counters, nitems(counters)); 936 937 for (i = 0; i < nitems(counters); i++) 938 words[i] = (u_long)counters[i]; 939 940 return (sysctl_rdstruct(oldp, oldlenp, newp, 941 &icmpstat, sizeof(icmpstat))); 942 } 943 944 struct rtentry * 945 icmp_mtudisc_clone(struct in_addr dst, u_int rtableid, int ipsec) 946 { 947 struct sockaddr_in sin; 948 struct rtentry *rt; 949 int error; 950 951 memset(&sin, 0, sizeof(sin)); 952 sin.sin_family = AF_INET; 953 sin.sin_len = sizeof(sin); 954 sin.sin_addr = dst; 955 956 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid); 957 958 /* Check if the route is actually usable */ 959 if (!rtisvalid(rt)) 960 goto bad; 961 /* IPsec needs the route only for PMTU, it can use reject for that */ 962 if (!ipsec && (rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE))) 963 goto bad; 964 965 /* 966 * No PMTU for local routes and permanent neighbors, 967 * ARP and NDP use the same expire timer as the route. 968 */ 969 if (ISSET(rt->rt_flags, RTF_LOCAL) || 970 (ISSET(rt->rt_flags, RTF_LLINFO) && rt->rt_expire == 0)) 971 goto bad; 972 973 /* If we didn't get a host route, allocate one */ 974 if ((rt->rt_flags & RTF_HOST) == 0) { 975 struct rtentry *nrt; 976 struct rt_addrinfo info; 977 struct sockaddr_rtlabel sa_rl; 978 979 memset(&info, 0, sizeof(info)); 980 info.rti_ifa = rt->rt_ifa; 981 info.rti_flags = RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC; 982 info.rti_info[RTAX_DST] = sintosa(&sin); 983 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 984 info.rti_info[RTAX_LABEL] = 985 rtlabel_id2sa(rt->rt_labelid, &sa_rl); 986 987 error = rtrequest(RTM_ADD, &info, rt->rt_priority, &nrt, 988 rtableid); 989 if (error) 990 goto bad; 991 nrt->rt_rmx = rt->rt_rmx; 992 rtfree(rt); 993 rt = nrt; 994 rtm_send(rt, RTM_ADD, 0, rtableid); 995 } 996 error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q, 997 rtableid); 998 if (error) 999 goto bad; 1000 1001 return (rt); 1002 bad: 1003 rtfree(rt); 1004 return (NULL); 1005 } 1006 1007 /* Table of common MTUs: */ 1008 static const u_short mtu_table[] = { 1009 65535, 65280, 32000, 17914, 9180, 8166, 1010 4352, 2002, 1492, 1006, 508, 296, 68, 0 1011 }; 1012 1013 void 1014 icmp_mtudisc(struct icmp *icp, u_int rtableid) 1015 { 1016 struct rtentry *rt; 1017 struct ifnet *ifp; 1018 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */ 1019 1020 rt = icmp_mtudisc_clone(icp->icmp_ip.ip_dst, rtableid, 0); 1021 if (rt == NULL) 1022 return; 1023 1024 ifp = if_get(rt->rt_ifidx); 1025 if (ifp == NULL) { 1026 rtfree(rt); 1027 return; 1028 } 1029 1030 if (mtu == 0) { 1031 int i = 0; 1032 1033 mtu = ntohs(icp->icmp_ip.ip_len); 1034 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */ 1035 if (mtu > rt->rt_mtu && rt->rt_mtu != 0) 1036 mtu -= (icp->icmp_ip.ip_hl << 2); 1037 1038 /* If we still can't guess a value, try the route */ 1039 if (mtu == 0) { 1040 mtu = rt->rt_mtu; 1041 1042 /* If no route mtu, default to the interface mtu */ 1043 1044 if (mtu == 0) 1045 mtu = ifp->if_mtu; 1046 } 1047 1048 for (i = 0; i < nitems(mtu_table); i++) 1049 if (mtu > mtu_table[i]) { 1050 mtu = mtu_table[i]; 1051 break; 1052 } 1053 } 1054 1055 /* 1056 * XXX: RTV_MTU is overloaded, since the admin can set it 1057 * to turn off PMTU for a route, and the kernel can 1058 * set it to indicate a serious problem with PMTU 1059 * on a route. We should be using a separate flag 1060 * for the kernel to indicate this. 1061 */ 1062 if ((rt->rt_locks & RTV_MTU) == 0) { 1063 if (mtu < 296 || mtu > ifp->if_mtu) 1064 rt->rt_locks |= RTV_MTU; 1065 else if (rt->rt_mtu > mtu || rt->rt_mtu == 0) 1066 rt->rt_mtu = mtu; 1067 } 1068 1069 if_put(ifp); 1070 rtfree(rt); 1071 } 1072 1073 void 1074 icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer *r) 1075 { 1076 struct ifnet *ifp; 1077 1078 NET_ASSERT_LOCKED(); 1079 1080 ifp = if_get(rt->rt_ifidx); 1081 if (ifp == NULL) 1082 return; 1083 1084 if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) { 1085 void (*ctlfunc)(int, struct sockaddr *, u_int, void *); 1086 struct sockaddr_in sin; 1087 1088 sin = *satosin(rt_key(rt)); 1089 1090 rtdeletemsg(rt, ifp, r->rtt_tableid); 1091 1092 /* Notify TCP layer of increased Path MTU estimate */ 1093 ctlfunc = inetsw[ip_protox[IPPROTO_TCP]].pr_ctlinput; 1094 if (ctlfunc) 1095 (*ctlfunc)(PRC_MTUINC, sintosa(&sin), 1096 r->rtt_tableid, NULL); 1097 } else { 1098 if ((rt->rt_locks & RTV_MTU) == 0) 1099 rt->rt_mtu = 0; 1100 } 1101 1102 if_put(ifp); 1103 } 1104 1105 /* 1106 * Perform rate limit check. 1107 * Returns 0 if it is okay to send the icmp packet. 1108 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate 1109 * limitation. 1110 * 1111 * XXX per-destination/type check necessary? 1112 */ 1113 int 1114 icmp_ratelimit(const struct in_addr *dst, const int type, const int code) 1115 { 1116 /* PPS limit */ 1117 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count, 1118 icmperrppslim)) 1119 return 1; /* The packet is subject to rate limit */ 1120 return 0; /* okay to send */ 1121 } 1122 1123 void 1124 icmp_redirect_timeout(struct rtentry *rt, struct rttimer *r) 1125 { 1126 struct ifnet *ifp; 1127 1128 NET_ASSERT_LOCKED(); 1129 1130 ifp = if_get(rt->rt_ifidx); 1131 if (ifp == NULL) 1132 return; 1133 1134 if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) { 1135 rtdeletemsg(rt, ifp, r->rtt_tableid); 1136 } 1137 1138 if_put(ifp); 1139 } 1140 1141 int 1142 icmp_do_exthdr(struct mbuf *m, u_int16_t class, u_int8_t ctype, void *buf, 1143 size_t len) 1144 { 1145 struct ip *ip = mtod(m, struct ip *); 1146 int hlen, off; 1147 struct mbuf *n; 1148 struct icmp *icp; 1149 struct icmp_ext_hdr *ieh; 1150 struct { 1151 struct icmp_ext_hdr ieh; 1152 struct icmp_ext_obj_hdr ieo; 1153 } hdr; 1154 1155 hlen = ip->ip_hl << 2; 1156 icp = (struct icmp *)(mtod(m, caddr_t) + hlen); 1157 if (icp->icmp_type != ICMP_TIMXCEED && icp->icmp_type != ICMP_UNREACH && 1158 icp->icmp_type != ICMP_PARAMPROB) 1159 /* exthdr not supported */ 1160 return (0); 1161 1162 if (icp->icmp_length != 0) 1163 /* exthdr already present, giving up */ 1164 return (0); 1165 1166 /* the actual offset starts after the common ICMP header */ 1167 hlen += ICMP_MINLEN; 1168 /* exthdr must start on a word boundary */ 1169 off = roundup(ntohs(ip->ip_len) - hlen, sizeof(u_int32_t)); 1170 /* ... and at an offset of ICMP_EXT_OFFSET or bigger */ 1171 off = max(off, ICMP_EXT_OFFSET); 1172 icp->icmp_length = off / sizeof(u_int32_t); 1173 1174 memset(&hdr, 0, sizeof(hdr)); 1175 hdr.ieh.ieh_version = ICMP_EXT_HDR_VERSION; 1176 hdr.ieo.ieo_length = htons(sizeof(struct icmp_ext_obj_hdr) + len); 1177 hdr.ieo.ieo_cnum = class; 1178 hdr.ieo.ieo_ctype = ctype; 1179 1180 if (m_copyback(m, hlen + off, sizeof(hdr), &hdr, M_NOWAIT) || 1181 m_copyback(m, hlen + off + sizeof(hdr), len, buf, M_NOWAIT)) { 1182 m_freem(m); 1183 return (ENOBUFS); 1184 } 1185 1186 /* calculate checksum */ 1187 n = m_getptr(m, hlen + off, &off); 1188 if (n == NULL) 1189 panic("icmp_do_exthdr: m_getptr failure"); 1190 ieh = (struct icmp_ext_hdr *)(mtod(n, caddr_t) + off); 1191 ieh->ieh_cksum = in4_cksum(n, 0, off, sizeof(hdr) + len); 1192 1193 ip->ip_len = htons(m->m_pkthdr.len); 1194 1195 return (0); 1196 } 1197