1 /* $OpenBSD: ip_icmp.c,v 1.192 2023/09/16 09:33:27 mpi Exp $ */ 2 /* $NetBSD: ip_icmp.c,v 1.19 1996/02/13 23:42:22 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)COPYRIGHT 1.1 (NRL) 17 January 1995 33 * 34 * NRL grants permission for redistribution and use in source and binary 35 * forms, with or without modification, of the software and documentation 36 * created at NRL provided that the following conditions are met: 37 * 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgements: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * This product includes software developed at the Information 48 * Technology Division, US Naval Research Laboratory. 49 * 4. Neither the name of the NRL nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS 54 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 55 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 56 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NRL OR 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 63 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * The views and conclusions contained in the software and documentation 66 * are those of the authors and should not be interpreted as representing 67 * official policies, either expressed or implied, of the US Naval 68 * Research Laboratory (NRL). 69 */ 70 71 #include "carp.h" 72 #include "pf.h" 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/mbuf.h> 77 #include <sys/protosw.h> 78 #include <sys/socket.h> 79 #include <sys/sysctl.h> 80 81 #include <net/if.h> 82 #include <net/if_var.h> 83 #include <net/route.h> 84 85 #include <netinet/in.h> 86 #include <netinet/in_systm.h> 87 #include <netinet/in_var.h> 88 #include <netinet/ip.h> 89 #include <netinet/ip_icmp.h> 90 #include <netinet/ip_var.h> 91 #include <netinet/icmp_var.h> 92 93 #if NCARP > 0 94 #include <net/if_types.h> 95 #include <netinet/ip_carp.h> 96 #endif 97 98 #if NPF > 0 99 #include <net/pfvar.h> 100 #endif 101 102 /* 103 * ICMP routines: error generation, receive packet processing, and 104 * routines to turnaround packets back to the originator, and 105 * host table maintenance routines. 106 */ 107 108 #ifdef ICMPPRINTFS 109 int icmpprintfs = 0; /* Settable from ddb */ 110 #endif 111 112 /* values controllable via sysctl */ 113 int icmpmaskrepl = 0; 114 int icmpbmcastecho = 0; 115 int icmptstamprepl = 1; 116 int icmperrppslim = 100; 117 int icmp_rediraccept = 0; 118 int icmp_redirtimeout = 10 * 60; 119 120 static int icmperrpps_count = 0; 121 static struct timeval icmperrppslim_last; 122 123 struct rttimer_queue ip_mtudisc_timeout_q; 124 struct rttimer_queue icmp_redirect_timeout_q; 125 struct cpumem *icmpcounters; 126 127 const struct sysctl_bounded_args icmpctl_vars[] = { 128 { ICMPCTL_MASKREPL, &icmpmaskrepl, 0, 1 }, 129 { ICMPCTL_BMCASTECHO, &icmpbmcastecho, 0, 1 }, 130 { ICMPCTL_ERRPPSLIMIT, &icmperrppslim, -1, INT_MAX }, 131 { ICMPCTL_REDIRACCEPT, &icmp_rediraccept, 0, 1 }, 132 { ICMPCTL_TSTAMPREPL, &icmptstamprepl, 0, 1 }, 133 }; 134 135 136 void icmp_mtudisc_timeout(struct rtentry *, u_int); 137 int icmp_ratelimit(const struct in_addr *, const int, const int); 138 int icmp_input_if(struct ifnet *, struct mbuf **, int *, int, int); 139 int icmp_sysctl_icmpstat(void *, size_t *, void *); 140 141 void 142 icmp_init(void) 143 { 144 rt_timer_queue_init(&ip_mtudisc_timeout_q, ip_mtudisc_timeout, 145 &icmp_mtudisc_timeout); 146 rt_timer_queue_init(&icmp_redirect_timeout_q, icmp_redirtimeout, 147 NULL); 148 icmpcounters = counters_alloc(icps_ncounters); 149 } 150 151 struct mbuf * 152 icmp_do_error(struct mbuf *n, int type, int code, u_int32_t dest, int destmtu) 153 { 154 struct ip *oip = mtod(n, struct ip *), *nip; 155 unsigned oiplen = oip->ip_hl << 2; 156 struct icmp *icp; 157 struct mbuf *m; 158 unsigned icmplen, mblen; 159 160 #ifdef ICMPPRINTFS 161 if (icmpprintfs) 162 printf("icmp_error(%x, %d, %d)\n", oip, type, code); 163 #endif 164 if (type != ICMP_REDIRECT) 165 icmpstat_inc(icps_error); 166 /* 167 * Don't send error if not the first fragment of message. 168 * Don't error if the old packet protocol was ICMP 169 * error message, only known informational types. 170 */ 171 if (oip->ip_off & htons(IP_OFFMASK)) 172 goto freeit; 173 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT && 174 n->m_len >= oiplen + ICMP_MINLEN && 175 !ICMP_INFOTYPE(((struct icmp *) 176 ((caddr_t)oip + oiplen))->icmp_type)) { 177 icmpstat_inc(icps_oldicmp); 178 goto freeit; 179 } 180 /* Don't send error in response to a multicast or broadcast packet */ 181 if (n->m_flags & (M_BCAST|M_MCAST)) 182 goto freeit; 183 184 /* 185 * First, do a rate limitation check. 186 */ 187 if (icmp_ratelimit(&oip->ip_src, type, code)) { 188 icmpstat_inc(icps_toofreq); 189 goto freeit; 190 } 191 192 /* 193 * Now, formulate icmp message 194 */ 195 icmplen = oiplen + min(8, ntohs(oip->ip_len)); 196 /* 197 * Defend against mbuf chains shorter than oip->ip_len: 198 */ 199 mblen = 0; 200 for (m = n; m && (mblen < icmplen); m = m->m_next) 201 mblen += m->m_len; 202 icmplen = min(mblen, icmplen); 203 204 /* 205 * As we are not required to return everything we have, 206 * we return whatever we can return at ease. 207 * 208 * Note that ICMP datagrams longer than 576 octets are out of spec 209 * according to RFC1812; 210 */ 211 212 KASSERT(ICMP_MINLEN + sizeof (struct ip) <= MCLBYTES); 213 214 if (sizeof (struct ip) + icmplen + ICMP_MINLEN > MCLBYTES) 215 icmplen = MCLBYTES - ICMP_MINLEN - sizeof (struct ip); 216 217 m = m_gethdr(M_DONTWAIT, MT_HEADER); 218 if (m && ((sizeof (struct ip) + icmplen + ICMP_MINLEN + 219 sizeof(long) - 1) &~ (sizeof(long) - 1)) > MHLEN) { 220 MCLGET(m, M_DONTWAIT); 221 if ((m->m_flags & M_EXT) == 0) { 222 m_freem(m); 223 m = NULL; 224 } 225 } 226 if (m == NULL) 227 goto freeit; 228 /* keep in same rtable and preserve other pkthdr bits */ 229 m->m_pkthdr.ph_rtableid = n->m_pkthdr.ph_rtableid; 230 m->m_pkthdr.ph_ifidx = n->m_pkthdr.ph_ifidx; 231 /* move PF_GENERATED to new packet, if existent XXX preserve more? */ 232 if (n->m_pkthdr.pf.flags & PF_TAG_GENERATED) 233 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 234 m->m_pkthdr.len = m->m_len = icmplen + ICMP_MINLEN; 235 m_align(m, m->m_len); 236 icp = mtod(m, struct icmp *); 237 if ((u_int)type > ICMP_MAXTYPE) 238 panic("icmp_error"); 239 icmpstat_inc(icps_outhist + type); 240 icp->icmp_type = type; 241 if (type == ICMP_REDIRECT) 242 icp->icmp_gwaddr.s_addr = dest; 243 else { 244 icp->icmp_void = 0; 245 /* 246 * The following assignments assume an overlay with the 247 * zeroed icmp_void field. 248 */ 249 if (type == ICMP_PARAMPROB) { 250 icp->icmp_pptr = code; 251 code = 0; 252 } else if (type == ICMP_UNREACH && 253 code == ICMP_UNREACH_NEEDFRAG && destmtu) 254 icp->icmp_nextmtu = htons(destmtu); 255 } 256 257 icp->icmp_code = code; 258 m_copydata(n, 0, icmplen, &icp->icmp_ip); 259 260 /* 261 * Now, copy old ip header (without options) 262 * in front of icmp message. 263 */ 264 m = m_prepend(m, sizeof(struct ip), M_DONTWAIT); 265 if (m == NULL) 266 goto freeit; 267 nip = mtod(m, struct ip *); 268 /* ip_v set in ip_output */ 269 nip->ip_hl = sizeof(struct ip) >> 2; 270 nip->ip_tos = 0; 271 nip->ip_len = htons(m->m_len); 272 /* ip_id set in ip_output */ 273 nip->ip_off = 0; 274 /* ip_ttl set in icmp_reflect */ 275 nip->ip_p = IPPROTO_ICMP; 276 nip->ip_src = oip->ip_src; 277 nip->ip_dst = oip->ip_dst; 278 279 m_freem(n); 280 return (m); 281 282 freeit: 283 m_freem(n); 284 return (NULL); 285 } 286 287 /* 288 * Generate an error packet of type error 289 * in response to bad packet ip. 290 * 291 * The ip packet inside has ip_off and ip_len in host byte order. 292 */ 293 void 294 icmp_error(struct mbuf *n, int type, int code, u_int32_t dest, int destmtu) 295 { 296 struct mbuf *m; 297 298 m = icmp_do_error(n, type, code, dest, destmtu); 299 if (m != NULL) 300 if (!icmp_reflect(m, NULL, NULL)) 301 icmp_send(m, NULL); 302 } 303 304 /* 305 * Process a received ICMP message. 306 */ 307 int 308 icmp_input(struct mbuf **mp, int *offp, int proto, int af) 309 { 310 struct ifnet *ifp; 311 312 ifp = if_get((*mp)->m_pkthdr.ph_ifidx); 313 if (ifp == NULL) { 314 m_freemp(mp); 315 return IPPROTO_DONE; 316 } 317 318 proto = icmp_input_if(ifp, mp, offp, proto, af); 319 if_put(ifp); 320 return proto; 321 } 322 323 int 324 icmp_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto, int af) 325 { 326 struct mbuf *m = *mp; 327 int hlen = *offp; 328 struct icmp *icp; 329 struct ip *ip = mtod(m, struct ip *); 330 struct sockaddr_in sin; 331 int icmplen, i, code; 332 struct in_ifaddr *ia; 333 void (*ctlfunc)(int, struct sockaddr *, u_int, void *); 334 struct mbuf *opts; 335 336 /* 337 * Locate icmp structure in mbuf, and check 338 * that not corrupted and of at least minimum length. 339 */ 340 icmplen = ntohs(ip->ip_len) - hlen; 341 #ifdef ICMPPRINTFS 342 if (icmpprintfs) { 343 char dst[INET_ADDRSTRLEN], src[INET_ADDRSTRLEN]; 344 345 inet_ntop(AF_INET, &ip->ip_dst, dst, sizeof(dst)); 346 inet_ntop(AF_INET, &ip->ip_src, src, sizeof(src)); 347 348 printf("icmp_input from %s to %s, len %d\n", src, dst, icmplen); 349 } 350 #endif 351 if (icmplen < ICMP_MINLEN) { 352 icmpstat_inc(icps_tooshort); 353 goto freeit; 354 } 355 i = hlen + min(icmplen, ICMP_ADVLENMAX); 356 if ((m = *mp = m_pullup(m, i)) == NULL) { 357 icmpstat_inc(icps_tooshort); 358 return IPPROTO_DONE; 359 } 360 ip = mtod(m, struct ip *); 361 if (in4_cksum(m, 0, hlen, icmplen)) { 362 icmpstat_inc(icps_checksum); 363 goto freeit; 364 } 365 366 icp = (struct icmp *)(mtod(m, caddr_t) + hlen); 367 #ifdef ICMPPRINTFS 368 /* 369 * Message type specific processing. 370 */ 371 if (icmpprintfs) 372 printf("icmp_input, type %d code %d\n", icp->icmp_type, 373 icp->icmp_code); 374 #endif 375 if (icp->icmp_type > ICMP_MAXTYPE) 376 goto raw; 377 #if NPF > 0 378 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) { 379 switch (icp->icmp_type) { 380 /* 381 * As pf_icmp_mapping() considers redirects belonging to a 382 * diverted connection, we must include it here. 383 */ 384 case ICMP_REDIRECT: 385 /* FALLTHROUGH */ 386 /* 387 * These ICMP types map to other connections. They must be 388 * delivered to pr_ctlinput() also for diverted connections. 389 */ 390 case ICMP_UNREACH: 391 case ICMP_TIMXCEED: 392 case ICMP_PARAMPROB: 393 case ICMP_SOURCEQUENCH: 394 /* 395 * Do not use the divert-to property of the TCP or UDP 396 * rule when doing the PCB lookup for the raw socket. 397 */ 398 m->m_pkthdr.pf.flags &=~ PF_TAG_DIVERTED; 399 break; 400 default: 401 goto raw; 402 } 403 } 404 #endif /* NPF */ 405 icmpstat_inc(icps_inhist + icp->icmp_type); 406 code = icp->icmp_code; 407 switch (icp->icmp_type) { 408 409 case ICMP_UNREACH: 410 switch (code) { 411 case ICMP_UNREACH_NET: 412 case ICMP_UNREACH_HOST: 413 case ICMP_UNREACH_PROTOCOL: 414 case ICMP_UNREACH_PORT: 415 case ICMP_UNREACH_SRCFAIL: 416 code += PRC_UNREACH_NET; 417 break; 418 419 case ICMP_UNREACH_NEEDFRAG: 420 code = PRC_MSGSIZE; 421 break; 422 423 case ICMP_UNREACH_NET_UNKNOWN: 424 case ICMP_UNREACH_NET_PROHIB: 425 case ICMP_UNREACH_TOSNET: 426 code = PRC_UNREACH_NET; 427 break; 428 429 case ICMP_UNREACH_HOST_UNKNOWN: 430 case ICMP_UNREACH_ISOLATED: 431 case ICMP_UNREACH_HOST_PROHIB: 432 case ICMP_UNREACH_TOSHOST: 433 case ICMP_UNREACH_FILTER_PROHIB: 434 case ICMP_UNREACH_HOST_PRECEDENCE: 435 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 436 code = PRC_UNREACH_HOST; 437 break; 438 439 default: 440 goto badcode; 441 } 442 goto deliver; 443 444 case ICMP_TIMXCEED: 445 if (code > 1) 446 goto badcode; 447 code += PRC_TIMXCEED_INTRANS; 448 goto deliver; 449 450 case ICMP_PARAMPROB: 451 if (code > 1) 452 goto badcode; 453 code = PRC_PARAMPROB; 454 goto deliver; 455 456 case ICMP_SOURCEQUENCH: 457 if (code) 458 goto badcode; 459 code = PRC_QUENCH; 460 deliver: 461 /* 462 * Problem with datagram; advise higher level routines. 463 */ 464 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 465 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { 466 icmpstat_inc(icps_badlen); 467 goto freeit; 468 } 469 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr)) 470 goto badcode; 471 #ifdef INET6 472 /* Get more contiguous data for a v6 in v4 ICMP message. */ 473 if (icp->icmp_ip.ip_p == IPPROTO_IPV6) { 474 if (icmplen < ICMP_V6ADVLENMIN || 475 icmplen < ICMP_V6ADVLEN(icp)) { 476 icmpstat_inc(icps_badlen); 477 goto freeit; 478 } 479 } 480 #endif /* INET6 */ 481 #ifdef ICMPPRINTFS 482 if (icmpprintfs) 483 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); 484 #endif 485 memset(&sin, 0, sizeof(sin)); 486 sin.sin_family = AF_INET; 487 sin.sin_len = sizeof(struct sockaddr_in); 488 sin.sin_addr = icp->icmp_ip.ip_dst; 489 #if NCARP > 0 490 if (carp_lsdrop(ifp, m, AF_INET, &sin.sin_addr.s_addr, 491 &ip->ip_dst.s_addr, 1)) 492 goto freeit; 493 #endif 494 /* 495 * XXX if the packet contains [IPv4 AH TCP], we can't make a 496 * notification to TCP layer. 497 */ 498 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput; 499 if (ctlfunc) 500 (*ctlfunc)(code, sintosa(&sin), m->m_pkthdr.ph_rtableid, 501 &icp->icmp_ip); 502 break; 503 504 badcode: 505 icmpstat_inc(icps_badcode); 506 break; 507 508 case ICMP_ECHO: 509 if (!icmpbmcastecho && 510 (m->m_flags & (M_MCAST | M_BCAST)) != 0) { 511 icmpstat_inc(icps_bmcastecho); 512 break; 513 } 514 icp->icmp_type = ICMP_ECHOREPLY; 515 goto reflect; 516 517 case ICMP_TSTAMP: 518 if (icmptstamprepl == 0) 519 break; 520 521 if (!icmpbmcastecho && 522 (m->m_flags & (M_MCAST | M_BCAST)) != 0) { 523 icmpstat_inc(icps_bmcastecho); 524 break; 525 } 526 if (icmplen < ICMP_TSLEN) { 527 icmpstat_inc(icps_badlen); 528 break; 529 } 530 icp->icmp_type = ICMP_TSTAMPREPLY; 531 icp->icmp_rtime = iptime(); 532 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */ 533 goto reflect; 534 535 case ICMP_MASKREQ: 536 if (icmpmaskrepl == 0) 537 break; 538 if (icmplen < ICMP_MASKLEN) { 539 icmpstat_inc(icps_badlen); 540 break; 541 } 542 /* 543 * We are not able to respond with all ones broadcast 544 * unless we receive it over a point-to-point interface. 545 */ 546 memset(&sin, 0, sizeof(sin)); 547 sin.sin_family = AF_INET; 548 sin.sin_len = sizeof(struct sockaddr_in); 549 if (ip->ip_dst.s_addr == INADDR_BROADCAST || 550 ip->ip_dst.s_addr == INADDR_ANY) 551 sin.sin_addr = ip->ip_src; 552 else 553 sin.sin_addr = ip->ip_dst; 554 if (ifp == NULL) 555 break; 556 ia = ifatoia(ifaof_ifpforaddr(sintosa(&sin), ifp)); 557 if (ia == NULL) 558 break; 559 icp->icmp_type = ICMP_MASKREPLY; 560 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr; 561 if (ip->ip_src.s_addr == 0) { 562 if (ifp->if_flags & IFF_BROADCAST) { 563 if (ia->ia_broadaddr.sin_addr.s_addr) 564 ip->ip_src = ia->ia_broadaddr.sin_addr; 565 else 566 ip->ip_src.s_addr = INADDR_BROADCAST; 567 } 568 else if (ifp->if_flags & IFF_POINTOPOINT) 569 ip->ip_src = ia->ia_dstaddr.sin_addr; 570 } 571 reflect: 572 #if NCARP > 0 573 if (carp_lsdrop(ifp, m, AF_INET, &ip->ip_src.s_addr, 574 &ip->ip_dst.s_addr, 1)) 575 goto freeit; 576 #endif 577 icmpstat_inc(icps_reflect); 578 icmpstat_inc(icps_outhist + icp->icmp_type); 579 if (!icmp_reflect(m, &opts, NULL)) { 580 icmp_send(m, opts); 581 m_free(opts); 582 } 583 return IPPROTO_DONE; 584 585 case ICMP_REDIRECT: 586 { 587 struct sockaddr_in sdst; 588 struct sockaddr_in sgw; 589 struct sockaddr_in ssrc; 590 struct rtentry *newrt = NULL; 591 592 if (icmp_rediraccept == 0 || ipforwarding == 1) 593 goto freeit; 594 if (code > 3) 595 goto badcode; 596 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 597 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { 598 icmpstat_inc(icps_badlen); 599 break; 600 } 601 /* 602 * Short circuit routing redirects to force 603 * immediate change in the kernel's routing 604 * tables. The message is also handed to anyone 605 * listening on a raw socket (e.g. the routing 606 * daemon for use in updating its tables). 607 */ 608 memset(&sdst, 0, sizeof(sdst)); 609 memset(&sgw, 0, sizeof(sgw)); 610 memset(&ssrc, 0, sizeof(ssrc)); 611 sdst.sin_family = sgw.sin_family = ssrc.sin_family = AF_INET; 612 sdst.sin_len = sgw.sin_len = ssrc.sin_len = sizeof(sdst); 613 memcpy(&sdst.sin_addr, &icp->icmp_ip.ip_dst, 614 sizeof(sdst.sin_addr)); 615 memcpy(&sgw.sin_addr, &icp->icmp_gwaddr, 616 sizeof(sgw.sin_addr)); 617 memcpy(&ssrc.sin_addr, &ip->ip_src, 618 sizeof(ssrc.sin_addr)); 619 620 #ifdef ICMPPRINTFS 621 if (icmpprintfs) { 622 char gw[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; 623 624 inet_ntop(AF_INET, &icp->icmp_gwaddr, gw, sizeof(gw)); 625 inet_ntop(AF_INET, &icp->icmp_ip.ip_dst, 626 dst, sizeof(dst)); 627 628 printf("redirect dst %s to %s\n", dst, gw); 629 } 630 #endif 631 632 #if NCARP > 0 633 if (carp_lsdrop(ifp, m, AF_INET, &sdst.sin_addr.s_addr, 634 &ip->ip_dst.s_addr, 1)) 635 goto freeit; 636 #endif 637 rtredirect(sintosa(&sdst), sintosa(&sgw), 638 sintosa(&ssrc), &newrt, m->m_pkthdr.ph_rtableid); 639 if (newrt != NULL && icmp_redirtimeout > 0) { 640 rt_timer_add(newrt, &icmp_redirect_timeout_q, 641 m->m_pkthdr.ph_rtableid); 642 } 643 rtfree(newrt); 644 pfctlinput(PRC_REDIRECT_HOST, sintosa(&sdst)); 645 break; 646 } 647 /* 648 * No kernel processing for the following; 649 * just fall through to send to raw listener. 650 */ 651 case ICMP_ECHOREPLY: 652 case ICMP_ROUTERADVERT: 653 case ICMP_ROUTERSOLICIT: 654 case ICMP_TSTAMPREPLY: 655 case ICMP_IREQREPLY: 656 case ICMP_MASKREPLY: 657 case ICMP_TRACEROUTE: 658 case ICMP_DATACONVERR: 659 case ICMP_MOBILE_REDIRECT: 660 case ICMP_IPV6_WHEREAREYOU: 661 case ICMP_IPV6_IAMHERE: 662 case ICMP_MOBILE_REGREQUEST: 663 case ICMP_MOBILE_REGREPLY: 664 case ICMP_PHOTURIS: 665 default: 666 break; 667 } 668 669 raw: 670 return rip_input(mp, offp, proto, af); 671 672 freeit: 673 m_freem(m); 674 return IPPROTO_DONE; 675 } 676 677 /* 678 * Reflect the ip packet back to the source 679 */ 680 int 681 icmp_reflect(struct mbuf *m, struct mbuf **op, struct in_ifaddr *ia) 682 { 683 struct ip *ip = mtod(m, struct ip *); 684 struct mbuf *opts = NULL; 685 struct sockaddr_in sin; 686 struct rtentry *rt = NULL; 687 int optlen = (ip->ip_hl << 2) - sizeof(struct ip); 688 u_int rtableid; 689 u_int8_t pfflags; 690 691 if (!in_canforward(ip->ip_src) && 692 ((ip->ip_src.s_addr & IN_CLASSA_NET) != 693 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) { 694 m_freem(m); /* Bad return address */ 695 return (EHOSTUNREACH); 696 } 697 698 if (m->m_pkthdr.ph_loopcnt++ >= M_MAXLOOP) { 699 m_freem(m); 700 return (ELOOP); 701 } 702 rtableid = m->m_pkthdr.ph_rtableid; 703 pfflags = m->m_pkthdr.pf.flags; 704 m_resethdr(m); 705 m->m_pkthdr.ph_rtableid = rtableid; 706 m->m_pkthdr.pf.flags = pfflags & PF_TAG_GENERATED; 707 708 /* 709 * If the incoming packet was addressed directly to us, 710 * use dst as the src for the reply. For broadcast, use 711 * the address which corresponds to the incoming interface. 712 */ 713 if (ia == NULL) { 714 memset(&sin, 0, sizeof(sin)); 715 sin.sin_len = sizeof(sin); 716 sin.sin_family = AF_INET; 717 sin.sin_addr = ip->ip_dst; 718 719 rt = rtalloc(sintosa(&sin), 0, rtableid); 720 if (rtisvalid(rt) && 721 ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) 722 ia = ifatoia(rt->rt_ifa); 723 } 724 725 /* 726 * The following happens if the packet was not addressed to us. 727 * Use the new source address and do a route lookup. If it fails 728 * drop the packet as there is no path to the host. 729 */ 730 if (ia == NULL) { 731 rtfree(rt); 732 733 memset(&sin, 0, sizeof(sin)); 734 sin.sin_len = sizeof(sin); 735 sin.sin_family = AF_INET; 736 sin.sin_addr = ip->ip_src; 737 738 /* keep packet in the original virtual instance */ 739 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid); 740 if (rt == NULL) { 741 ipstat_inc(ips_noroute); 742 m_freem(m); 743 return (EHOSTUNREACH); 744 } 745 746 ia = ifatoia(rt->rt_ifa); 747 } 748 749 ip->ip_dst = ip->ip_src; 750 ip->ip_ttl = MAXTTL; 751 752 /* It is safe to dereference ``ia'' iff ``rt'' is valid. */ 753 ip->ip_src = ia->ia_addr.sin_addr; 754 rtfree(rt); 755 756 if (optlen > 0) { 757 u_char *cp; 758 int opt, cnt; 759 u_int len; 760 761 /* 762 * Retrieve any source routing from the incoming packet; 763 * add on any record-route or timestamp options. 764 */ 765 cp = (u_char *) (ip + 1); 766 if (op && (opts = ip_srcroute(m)) == NULL && 767 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) { 768 opts->m_len = sizeof(struct in_addr); 769 mtod(opts, struct in_addr *)->s_addr = 0; 770 } 771 if (op && opts) { 772 #ifdef ICMPPRINTFS 773 if (icmpprintfs) 774 printf("icmp_reflect optlen %d rt %d => ", 775 optlen, opts->m_len); 776 #endif 777 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) { 778 opt = cp[IPOPT_OPTVAL]; 779 if (opt == IPOPT_EOL) 780 break; 781 if (opt == IPOPT_NOP) 782 len = 1; 783 else { 784 if (cnt < IPOPT_OLEN + sizeof(*cp)) 785 break; 786 len = cp[IPOPT_OLEN]; 787 if (len < IPOPT_OLEN + sizeof(*cp) || 788 len > cnt) 789 break; 790 } 791 /* 792 * Should check for overflow, but it 793 * "can't happen" 794 */ 795 if (opt == IPOPT_RR || opt == IPOPT_TS || 796 opt == IPOPT_SECURITY) { 797 memcpy(mtod(opts, caddr_t) + 798 opts->m_len, cp, len); 799 opts->m_len += len; 800 } 801 } 802 /* Terminate & pad, if necessary */ 803 if ((cnt = opts->m_len % 4) != 0) 804 for (; cnt < 4; cnt++) { 805 *(mtod(opts, caddr_t) + opts->m_len) = 806 IPOPT_EOL; 807 opts->m_len++; 808 } 809 #ifdef ICMPPRINTFS 810 if (icmpprintfs) 811 printf("%d\n", opts->m_len); 812 #endif 813 } 814 ip_stripoptions(m); 815 } 816 m->m_flags &= ~(M_BCAST|M_MCAST); 817 if (op) 818 *op = opts; 819 820 return (0); 821 } 822 823 /* 824 * Send an icmp packet back to the ip level 825 */ 826 void 827 icmp_send(struct mbuf *m, struct mbuf *opts) 828 { 829 struct ip *ip = mtod(m, struct ip *); 830 int hlen; 831 struct icmp *icp; 832 833 hlen = ip->ip_hl << 2; 834 icp = (struct icmp *)(mtod(m, caddr_t) + hlen); 835 icp->icmp_cksum = 0; 836 m->m_pkthdr.csum_flags = M_ICMP_CSUM_OUT; 837 #ifdef ICMPPRINTFS 838 if (icmpprintfs) { 839 char dst[INET_ADDRSTRLEN], src[INET_ADDRSTRLEN]; 840 841 inet_ntop(AF_INET, &ip->ip_dst, dst, sizeof(dst)); 842 inet_ntop(AF_INET, &ip->ip_src, src, sizeof(src)); 843 844 printf("icmp_send dst %s src %s\n", dst, src); 845 } 846 #endif 847 /* 848 * ip_send() cannot handle IP options properly. So in case we have 849 * options fill out the IP header here and use ip_send_raw() instead. 850 */ 851 if (opts != NULL) { 852 m = ip_insertoptions(m, opts, &hlen); 853 ip = mtod(m, struct ip *); 854 ip->ip_hl = (hlen >> 2); 855 ip->ip_v = IPVERSION; 856 ip->ip_off &= htons(IP_DF); 857 ip->ip_id = htons(ip_randomid()); 858 ipstat_inc(ips_localout); 859 ip_send_raw(m); 860 } else 861 ip_send(m); 862 } 863 864 u_int32_t 865 iptime(void) 866 { 867 struct timeval atv; 868 u_long t; 869 870 microtime(&atv); 871 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000; 872 return (htonl(t)); 873 } 874 875 int 876 icmp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 877 size_t newlen) 878 { 879 int error; 880 881 /* All sysctl names at this level are terminal. */ 882 if (namelen != 1) 883 return (ENOTDIR); 884 885 switch (name[0]) { 886 case ICMPCTL_REDIRTIMEOUT: 887 NET_LOCK(); 888 error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, 889 &icmp_redirtimeout, 0, INT_MAX); 890 rt_timer_queue_change(&icmp_redirect_timeout_q, 891 icmp_redirtimeout); 892 NET_UNLOCK(); 893 break; 894 895 case ICMPCTL_STATS: 896 error = icmp_sysctl_icmpstat(oldp, oldlenp, newp); 897 break; 898 899 default: 900 NET_LOCK(); 901 error = sysctl_bounded_arr(icmpctl_vars, nitems(icmpctl_vars), 902 name, namelen, oldp, oldlenp, newp, newlen); 903 NET_UNLOCK(); 904 break; 905 } 906 907 return (error); 908 } 909 910 int 911 icmp_sysctl_icmpstat(void *oldp, size_t *oldlenp, void *newp) 912 { 913 uint64_t counters[icps_ncounters]; 914 struct icmpstat icmpstat; 915 u_long *words = (u_long *)&icmpstat; 916 int i; 917 918 CTASSERT(sizeof(icmpstat) == (nitems(counters) * sizeof(u_long))); 919 memset(&icmpstat, 0, sizeof icmpstat); 920 counters_read(icmpcounters, counters, nitems(counters), NULL); 921 922 for (i = 0; i < nitems(counters); i++) 923 words[i] = (u_long)counters[i]; 924 925 return (sysctl_rdstruct(oldp, oldlenp, newp, 926 &icmpstat, sizeof(icmpstat))); 927 } 928 929 struct rtentry * 930 icmp_mtudisc_clone(struct in_addr dst, u_int rtableid, int ipsec) 931 { 932 struct sockaddr_in sin; 933 struct rtentry *rt; 934 int error; 935 936 memset(&sin, 0, sizeof(sin)); 937 sin.sin_family = AF_INET; 938 sin.sin_len = sizeof(sin); 939 sin.sin_addr = dst; 940 941 rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid); 942 943 /* Check if the route is actually usable */ 944 if (!rtisvalid(rt)) 945 goto bad; 946 /* IPsec needs the route only for PMTU, it can use reject for that */ 947 if (!ipsec && (rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE))) 948 goto bad; 949 950 /* 951 * No PMTU for local routes and permanent neighbors, 952 * ARP and NDP use the same expire timer as the route. 953 */ 954 if (ISSET(rt->rt_flags, RTF_LOCAL) || 955 (ISSET(rt->rt_flags, RTF_LLINFO) && rt->rt_expire == 0)) 956 goto bad; 957 958 /* If we didn't get a host route, allocate one */ 959 if ((rt->rt_flags & RTF_HOST) == 0) { 960 struct rtentry *nrt; 961 struct rt_addrinfo info; 962 struct sockaddr_rtlabel sa_rl; 963 964 memset(&info, 0, sizeof(info)); 965 info.rti_ifa = rt->rt_ifa; 966 info.rti_flags = RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC; 967 info.rti_info[RTAX_DST] = sintosa(&sin); 968 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 969 info.rti_info[RTAX_LABEL] = 970 rtlabel_id2sa(rt->rt_labelid, &sa_rl); 971 972 error = rtrequest(RTM_ADD, &info, rt->rt_priority, &nrt, 973 rtableid); 974 if (error) 975 goto bad; 976 nrt->rt_rmx = rt->rt_rmx; 977 rtfree(rt); 978 rt = nrt; 979 rtm_send(rt, RTM_ADD, 0, rtableid); 980 } 981 error = rt_timer_add(rt, &ip_mtudisc_timeout_q, rtableid); 982 if (error) 983 goto bad; 984 985 return (rt); 986 bad: 987 rtfree(rt); 988 return (NULL); 989 } 990 991 /* Table of common MTUs: */ 992 static const u_short mtu_table[] = { 993 65535, 65280, 32000, 17914, 9180, 8166, 994 4352, 2002, 1492, 1006, 508, 296, 68, 0 995 }; 996 997 void 998 icmp_mtudisc(struct icmp *icp, u_int rtableid) 999 { 1000 struct rtentry *rt; 1001 struct ifnet *ifp; 1002 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */ 1003 1004 rt = icmp_mtudisc_clone(icp->icmp_ip.ip_dst, rtableid, 0); 1005 if (rt == NULL) 1006 return; 1007 1008 ifp = if_get(rt->rt_ifidx); 1009 if (ifp == NULL) { 1010 rtfree(rt); 1011 return; 1012 } 1013 1014 if (mtu == 0) { 1015 int i = 0; 1016 1017 mtu = ntohs(icp->icmp_ip.ip_len); 1018 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */ 1019 if (mtu > rt->rt_mtu && rt->rt_mtu != 0) 1020 mtu -= (icp->icmp_ip.ip_hl << 2); 1021 1022 /* If we still can't guess a value, try the route */ 1023 if (mtu == 0) { 1024 mtu = rt->rt_mtu; 1025 1026 /* If no route mtu, default to the interface mtu */ 1027 1028 if (mtu == 0) 1029 mtu = ifp->if_mtu; 1030 } 1031 1032 for (i = 0; i < nitems(mtu_table); i++) 1033 if (mtu > mtu_table[i]) { 1034 mtu = mtu_table[i]; 1035 break; 1036 } 1037 } 1038 1039 /* 1040 * XXX: RTV_MTU is overloaded, since the admin can set it 1041 * to turn off PMTU for a route, and the kernel can 1042 * set it to indicate a serious problem with PMTU 1043 * on a route. We should be using a separate flag 1044 * for the kernel to indicate this. 1045 */ 1046 if ((rt->rt_locks & RTV_MTU) == 0) { 1047 if (mtu < 296 || mtu > ifp->if_mtu) 1048 rt->rt_locks |= RTV_MTU; 1049 else if (rt->rt_mtu > mtu || rt->rt_mtu == 0) 1050 rt->rt_mtu = mtu; 1051 } 1052 1053 if_put(ifp); 1054 rtfree(rt); 1055 } 1056 1057 void 1058 icmp_mtudisc_timeout(struct rtentry *rt, u_int rtableid) 1059 { 1060 struct ifnet *ifp; 1061 1062 NET_ASSERT_LOCKED(); 1063 1064 ifp = if_get(rt->rt_ifidx); 1065 if (ifp == NULL) 1066 return; 1067 1068 if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) { 1069 void (*ctlfunc)(int, struct sockaddr *, u_int, void *); 1070 struct sockaddr_in sin; 1071 1072 sin = *satosin(rt_key(rt)); 1073 1074 rtdeletemsg(rt, ifp, rtableid); 1075 1076 /* Notify TCP layer of increased Path MTU estimate */ 1077 ctlfunc = inetsw[ip_protox[IPPROTO_TCP]].pr_ctlinput; 1078 if (ctlfunc) 1079 (*ctlfunc)(PRC_MTUINC, sintosa(&sin), 1080 rtableid, NULL); 1081 } else { 1082 if ((rt->rt_locks & RTV_MTU) == 0) 1083 rt->rt_mtu = 0; 1084 } 1085 1086 if_put(ifp); 1087 } 1088 1089 /* 1090 * Perform rate limit check. 1091 * Returns 0 if it is okay to send the icmp packet. 1092 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate 1093 * limitation. 1094 * 1095 * XXX per-destination/type check necessary? 1096 */ 1097 int 1098 icmp_ratelimit(const struct in_addr *dst, const int type, const int code) 1099 { 1100 /* PPS limit */ 1101 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count, 1102 icmperrppslim)) 1103 return 1; /* The packet is subject to rate limit */ 1104 return 0; /* okay to send */ 1105 } 1106 1107 int 1108 icmp_do_exthdr(struct mbuf *m, u_int16_t class, u_int8_t ctype, void *buf, 1109 size_t len) 1110 { 1111 struct ip *ip = mtod(m, struct ip *); 1112 int hlen, off; 1113 struct mbuf *n; 1114 struct icmp *icp; 1115 struct icmp_ext_hdr *ieh; 1116 struct { 1117 struct icmp_ext_hdr ieh; 1118 struct icmp_ext_obj_hdr ieo; 1119 } hdr; 1120 1121 hlen = ip->ip_hl << 2; 1122 icp = (struct icmp *)(mtod(m, caddr_t) + hlen); 1123 if (icp->icmp_type != ICMP_TIMXCEED && icp->icmp_type != ICMP_UNREACH && 1124 icp->icmp_type != ICMP_PARAMPROB) 1125 /* exthdr not supported */ 1126 return (0); 1127 1128 if (icp->icmp_length != 0) 1129 /* exthdr already present, giving up */ 1130 return (0); 1131 1132 /* the actual offset starts after the common ICMP header */ 1133 hlen += ICMP_MINLEN; 1134 /* exthdr must start on a word boundary */ 1135 off = roundup(ntohs(ip->ip_len) - hlen, sizeof(u_int32_t)); 1136 /* ... and at an offset of ICMP_EXT_OFFSET or bigger */ 1137 off = max(off, ICMP_EXT_OFFSET); 1138 icp->icmp_length = off / sizeof(u_int32_t); 1139 1140 memset(&hdr, 0, sizeof(hdr)); 1141 hdr.ieh.ieh_version = ICMP_EXT_HDR_VERSION; 1142 hdr.ieo.ieo_length = htons(sizeof(struct icmp_ext_obj_hdr) + len); 1143 hdr.ieo.ieo_cnum = class; 1144 hdr.ieo.ieo_ctype = ctype; 1145 1146 if (m_copyback(m, hlen + off, sizeof(hdr), &hdr, M_NOWAIT) || 1147 m_copyback(m, hlen + off + sizeof(hdr), len, buf, M_NOWAIT)) { 1148 m_freem(m); 1149 return (ENOBUFS); 1150 } 1151 1152 /* calculate checksum */ 1153 n = m_getptr(m, hlen + off, &off); 1154 if (n == NULL) 1155 panic("icmp_do_exthdr: m_getptr failure"); 1156 ieh = (struct icmp_ext_hdr *)(mtod(n, caddr_t) + off); 1157 ieh->ieh_cksum = in4_cksum(n, 0, off, sizeof(hdr) + len); 1158 1159 ip->ip_len = htons(m->m_pkthdr.len); 1160 1161 return (0); 1162 } 1163