1 /* 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 30 * $FreeBSD: src/sys/netinet/ip_icmp.c,v 1.39.2.19 2003/01/24 05:11:34 sam Exp $ 31 */ 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> /* for M_NOWAIT */ 36 #include <sys/mbuf.h> 37 #include <sys/protosw.h> 38 #include <sys/socket.h> 39 #include <sys/socketops.h> 40 #include <sys/time.h> 41 #include <sys/kernel.h> 42 #include <sys/sysctl.h> 43 #include <sys/in_cksum.h> 44 45 #include <machine/stdarg.h> 46 47 #include <net/if.h> 48 #include <net/if_types.h> 49 #include <net/netisr2.h> 50 #include <net/netmsg2.h> 51 #include <net/route.h> 52 53 #define _IP_VHL 54 #include <netinet/in.h> 55 #include <netinet/in_systm.h> 56 #include <netinet/in_var.h> 57 #include <netinet/ip.h> 58 #include <netinet/ip_icmp.h> 59 #include <netinet/ip_var.h> 60 #include <netinet/icmp_var.h> 61 62 /* 63 * ICMP routines: error generation, receive packet processing, and 64 * routines to turnaround packets back to the originator, and 65 * host table maintenance routines. 66 */ 67 68 struct icmpstat icmpstat; 69 SYSCTL_STRUCT(_net_inet_icmp, ICMPCTL_STATS, stats, CTLFLAG_RW, 70 &icmpstat, icmpstat, "ICMP statistics"); 71 72 static int icmpmaskrepl = 0; 73 SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_RW, 74 &icmpmaskrepl, 0, "Allow replies to netmask requests"); 75 76 static int drop_redirect = 0; 77 SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW, 78 &drop_redirect, 0, "Ignore ICMP redirects"); 79 80 static int log_redirect = 0; 81 SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_RW, 82 &log_redirect, 0, "Enable output about ICMP redirects"); 83 84 static int discard_sourcequench = 1; 85 SYSCTL_INT(_net_inet_icmp, OID_AUTO, discard_sourcequench, CTLFLAG_RW, 86 &discard_sourcequench, 0, "Discard ICMP Source Quench"); 87 88 #ifdef ICMP_BANDLIM 89 90 /* 91 * ICMP error-response bandwidth limiting sysctl. If not enabled, sysctl 92 * variable content is -1 and read-only. 93 */ 94 95 static int icmplim = 200; 96 SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RW, 97 &icmplim, 0, "ICMP bandwidth limit"); 98 #else 99 100 static int icmplim = -1; 101 SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_RD, 102 &icmplim, 0, "ICMP bandwidth limit"); 103 104 #endif 105 106 static int icmplim_output = 0; 107 SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_RW, 108 &icmplim_output, 0, "Enable output about ICMP bandwidth limits"); 109 110 /* 111 * ICMP broadcast echo sysctl 112 */ 113 114 static int icmpbmcastecho = 0; 115 SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_RW, 116 &icmpbmcastecho, 0, ""); 117 118 static char icmp_reply_src[IFNAMSIZ]; 119 SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_RW, 120 icmp_reply_src, IFNAMSIZ, "icmp reply source for non-local packets."); 121 122 static int icmp_rfi; 123 SYSCTL_INT(_net_inet_icmp, OID_AUTO, reply_from_interface, CTLFLAG_RW, 124 &icmp_rfi, 0, "ICMP reply from incoming interface for " 125 "non-local packets"); 126 127 #ifdef ICMPPRINTFS 128 static int icmpprintfs = 0; 129 SYSCTL_INT(_net_inet_icmp, OID_AUTO, debug_prints, CTLFLAG_RW, 130 &icmpprintfs, 0, "extra ICMP debug prints"); 131 #endif 132 133 static void icmp_reflect (struct mbuf *); 134 static void icmp_send (struct mbuf *, struct mbuf *, struct route *); 135 136 extern struct protosw inetsw[]; 137 138 /* 139 * Generate an error packet of type error 140 * in response to bad packet ip. 141 */ 142 void 143 icmp_error(struct mbuf *n, int type, int code, n_long dest, int destmtu) 144 { 145 struct ip *oip = mtod(n, struct ip *), *nip; 146 unsigned oiplen = IP_VHL_HL(oip->ip_vhl) << 2; 147 struct icmp *icp; 148 struct mbuf *m; 149 unsigned icmplen; 150 151 #ifdef ICMPPRINTFS 152 if (icmpprintfs) 153 kprintf("icmp_error(%p, %d, %d)\n", oip, type, code); 154 #endif 155 if (type != ICMP_REDIRECT) 156 icmpstat.icps_error++; 157 /* 158 * Don't send error if the original packet was encrypted. 159 * Don't send error if not the first fragment of message. 160 * Don't error if the old packet protocol was ICMP 161 * error message, only known informational types. 162 */ 163 if (n->m_flags & M_DECRYPTED) 164 goto freeit; 165 if (oip->ip_off &~ (IP_MF|IP_DF)) 166 goto freeit; 167 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT && 168 n->m_len >= oiplen + ICMP_MINLEN && 169 !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) { 170 icmpstat.icps_oldicmp++; 171 goto freeit; 172 } 173 /* Don't send error in response to a multicast or broadcast packet */ 174 if (n->m_flags & (M_BCAST|M_MCAST)) 175 goto freeit; 176 /* 177 * First, formulate icmp message 178 */ 179 m = m_gethdr(M_NOWAIT, MT_HEADER); 180 if (m == NULL) 181 goto freeit; 182 icmplen = min(oiplen + 8, oip->ip_len); 183 if (icmplen < sizeof(struct ip)) 184 panic("icmp_error: bad length"); 185 m->m_len = icmplen + ICMP_MINLEN; 186 MH_ALIGN(m, m->m_len); 187 icp = mtod(m, struct icmp *); 188 if ((u_int)type > ICMP_MAXTYPE) 189 panic("icmp_error"); 190 icmpstat.icps_outhist[type]++; 191 icp->icmp_type = type; 192 if (type == ICMP_REDIRECT) 193 icp->icmp_gwaddr.s_addr = dest; 194 else { 195 icp->icmp_void = 0; 196 /* 197 * The following assignments assume an overlay with the 198 * zeroed icmp_void field. 199 */ 200 if (type == ICMP_PARAMPROB) { 201 icp->icmp_pptr = code; 202 code = 0; 203 } else if (type == ICMP_UNREACH && 204 code == ICMP_UNREACH_NEEDFRAG && destmtu) { 205 icp->icmp_nextmtu = htons(destmtu); 206 } 207 } 208 209 icp->icmp_code = code; 210 m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip); 211 nip = &icp->icmp_ip; 212 213 /* 214 * Convert fields to network representation. 215 */ 216 nip->ip_len = htons(nip->ip_len); 217 nip->ip_off = htons(nip->ip_off); 218 219 /* 220 * Now, copy old ip header (without options) 221 * in front of icmp message. 222 */ 223 if (m->m_data - sizeof(struct ip) < m->m_pktdat) 224 panic("icmp len"); 225 m->m_data -= sizeof(struct ip); 226 m->m_len += sizeof(struct ip); 227 m->m_pkthdr.len = m->m_len; 228 m->m_pkthdr.rcvif = n->m_pkthdr.rcvif; 229 nip = mtod(m, struct ip *); 230 bcopy(oip, nip, sizeof(struct ip)); 231 nip->ip_len = m->m_len; 232 nip->ip_vhl = IP_VHL_BORING; 233 nip->ip_p = IPPROTO_ICMP; 234 nip->ip_tos = 0; 235 m->m_pkthdr.fw_flags |= n->m_pkthdr.fw_flags & FW_MBUF_GENERATED; 236 icmp_reflect(m); 237 238 freeit: 239 m_freem(n); 240 } 241 242 static void 243 icmp_ctlinput_done_handler(netmsg_t nmsg) 244 { 245 struct netmsg_ctlinput *msg = (struct netmsg_ctlinput *)nmsg; 246 struct mbuf *m = msg->m; 247 int hlen = msg->hlen; 248 249 rip_input(&m, &hlen, msg->proto); 250 } 251 252 static void 253 icmp_ctlinput_done(struct mbuf *m) 254 { 255 struct netmsg_ctlinput *msg = &m->m_hdr.mh_ctlmsg; 256 257 netmsg_init(&msg->base, NULL, &netisr_apanic_rport, 0, 258 icmp_ctlinput_done_handler); 259 lwkt_sendmsg(netisr_cpuport(0), &msg->base.lmsg); 260 } 261 262 static void 263 icmp_mtudisc(struct mbuf *m, int hlen) 264 { 265 struct sockaddr_in icmpsrc = { sizeof(struct sockaddr_in), AF_INET }; 266 struct rtentry *rt; 267 struct icmp *icp; 268 269 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 270 271 icp = mtodoff(m, struct icmp *, hlen); 272 icmpsrc.sin_addr = icp->icmp_ip.ip_dst; 273 274 /* 275 * MTU discovery: 276 * If we got a needfrag and there is a host route to the original 277 * destination, and the MTU is not locked, then set the MTU in the 278 * route to the suggested new value (if given) and then notify as 279 * usual. The ULPs will notice that the MTU has changed and adapt 280 * accordingly. If no new MTU was suggested, then we guess a new 281 * one less than the current value. If the new MTU is unreasonably 282 * small (arbitrarily set at 296), then we reset the MTU to the 283 * interface value and enable the lock bit, indicating that we are 284 * no longer doing MTU discovery. 285 */ 286 rt = rtpurelookup((struct sockaddr *)&icmpsrc); 287 if (rt != NULL && (rt->rt_flags & RTF_HOST) && 288 !(rt->rt_rmx.rmx_locks & RTV_MTU)) { 289 #ifdef DEBUG_MTUDISC 290 char src_buf[INET_ADDRSTRLEN]; 291 #endif 292 int mtu; 293 294 mtu = ntohs(icp->icmp_nextmtu); 295 if (!mtu) 296 mtu = ip_next_mtu(rt->rt_rmx.rmx_mtu, 1); 297 #ifdef DEBUG_MTUDISC 298 kprintf("MTU for %s reduced to %d\n", 299 inet_ntop(AF_INET, &icmpsrc.sin_addr, 300 src_buf, INET_ADDRSTRLEN), mtu); 301 #endif 302 if (mtu < 296) { 303 /* rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; */ 304 rt->rt_rmx.rmx_locks |= RTV_MTU; 305 } else if (rt->rt_rmx.rmx_mtu > mtu) { 306 rt->rt_rmx.rmx_mtu = mtu; 307 } 308 } 309 if (rt != NULL) 310 --rt->rt_refcnt; 311 312 /* 313 * XXX if the packet contains [IPv4 AH TCP], we can't make a 314 * notification to TCP layer. 315 */ 316 so_pr_ctlinput_direct(&inetsw[ip_protox[icp->icmp_ip.ip_p]], 317 PRC_MSGSIZE, (struct sockaddr *)&icmpsrc, &icp->icmp_ip); 318 } 319 320 static void 321 icmp_mtudisc_handler(netmsg_t nmsg) 322 { 323 struct netmsg_ctlinput *msg = (struct netmsg_ctlinput *)nmsg; 324 int nextcpu; 325 326 ASSERT_NETISR_NCPUS(mycpuid); 327 328 icmp_mtudisc(msg->m, msg->hlen); 329 330 nextcpu = mycpuid + 1; 331 if (nextcpu < netisr_ncpus) 332 lwkt_forwardmsg(netisr_cpuport(nextcpu), &msg->base.lmsg); 333 else 334 icmp_ctlinput_done(msg->m); 335 } 336 337 static boolean_t 338 icmp_mtudisc_start(struct mbuf *m, int hlen, int proto) 339 { 340 struct netmsg_ctlinput *msg; 341 342 ASSERT_NETISR0; 343 344 icmp_mtudisc(m, hlen); 345 346 if (netisr_ncpus == 1) { 347 /* There is only one netisr; done */ 348 return FALSE; 349 } 350 351 msg = &m->m_hdr.mh_ctlmsg; 352 netmsg_init(&msg->base, NULL, &netisr_apanic_rport, 0, 353 icmp_mtudisc_handler); 354 msg->m = m; 355 msg->cmd = PRC_MSGSIZE; 356 msg->hlen = hlen; 357 msg->proto = proto; 358 359 lwkt_sendmsg(netisr_cpuport(1), &msg->base.lmsg); 360 return TRUE; 361 } 362 363 static void 364 icmp_ctlinput(struct mbuf *m, int cmd, int hlen) 365 { 366 struct sockaddr_in icmpsrc = { sizeof(struct sockaddr_in), AF_INET }; 367 struct icmp *icp; 368 369 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 370 371 icp = mtodoff(m, struct icmp *, hlen); 372 icmpsrc.sin_addr = icp->icmp_ip.ip_dst; 373 374 /* 375 * XXX if the packet contains [IPv4 AH TCP], we can't make a 376 * notification to TCP layer. 377 */ 378 so_pr_ctlinput_direct(&inetsw[ip_protox[icp->icmp_ip.ip_p]], 379 cmd, (struct sockaddr *)&icmpsrc, &icp->icmp_ip); 380 } 381 382 static void 383 icmp_ctlinput_handler(netmsg_t nmsg) 384 { 385 struct netmsg_ctlinput *msg = (struct netmsg_ctlinput *)nmsg; 386 387 ASSERT_NETISR_NCPUS(mycpuid); 388 389 icmp_ctlinput(msg->m, msg->cmd, msg->hlen); 390 icmp_ctlinput_done(msg->m); 391 } 392 393 static void 394 icmp_ctlinput_start(struct mbuf *m, struct lwkt_port *port, 395 int cmd, int hlen, int proto) 396 { 397 struct netmsg_ctlinput *msg; 398 399 KASSERT(&curthread->td_msgport != port, 400 ("send icmp ctlinput to the current netisr")); 401 402 msg = &m->m_hdr.mh_ctlmsg; 403 netmsg_init(&msg->base, NULL, &netisr_apanic_rport, 0, 404 icmp_ctlinput_handler); 405 msg->m = m; 406 msg->cmd = cmd; 407 msg->hlen = hlen; 408 msg->proto = proto; 409 410 lwkt_sendmsg(port, &msg->base.lmsg); 411 } 412 413 static void 414 icmp_ctlinput_global_handler(netmsg_t nmsg) 415 { 416 struct netmsg_ctlinput *msg = (struct netmsg_ctlinput *)nmsg; 417 int nextcpu; 418 419 ASSERT_NETISR_NCPUS(mycpuid); 420 421 icmp_ctlinput(msg->m, msg->cmd, msg->hlen); 422 423 nextcpu = mycpuid + 1; 424 if (nextcpu < netisr_ncpus) 425 lwkt_forwardmsg(netisr_cpuport(nextcpu), &msg->base.lmsg); 426 else 427 icmp_ctlinput_done(msg->m); 428 } 429 430 static void 431 icmp_ctlinput_global_start(struct mbuf *m, int cmd, int hlen, int proto) 432 { 433 struct netmsg_ctlinput *msg; 434 435 ASSERT_NETISR0; 436 KASSERT(netisr_ncpus > 1, ("there is only 1 netisr cpu")); 437 438 icmp_ctlinput(m, cmd, hlen); 439 440 msg = &m->m_hdr.mh_ctlmsg; 441 netmsg_init(&msg->base, NULL, &netisr_apanic_rport, 0, 442 icmp_ctlinput_global_handler); 443 msg->m = m; 444 msg->cmd = cmd; 445 msg->hlen = hlen; 446 msg->proto = proto; 447 448 lwkt_sendmsg(netisr_cpuport(1), &msg->base.lmsg); 449 } 450 451 #define ICMP_RTREDIRECT_FLAGS (RTF_GATEWAY | RTF_HOST) 452 453 static void 454 icmp_redirect(struct mbuf *m, int hlen, boolean_t prt) 455 { 456 struct sockaddr_in icmpsrc = { sizeof(struct sockaddr_in), AF_INET }; 457 struct sockaddr_in icmpdst = { sizeof(struct sockaddr_in), AF_INET }; 458 struct sockaddr_in icmpgw = { sizeof(struct sockaddr_in), AF_INET }; 459 struct icmp *icp; 460 struct ip *ip; 461 462 KASSERT(curthread->td_type == TD_TYPE_NETISR, ("not in netisr")); 463 464 ip = mtod(m, struct ip *); 465 icp = mtodoff(m, struct icmp *, hlen); 466 467 /* 468 * Short circuit routing redirects to force immediate change 469 * in the kernel's routing tables. The message is also handed 470 * to anyone listening on a raw socket (e.g. the routing daemon 471 * for use in updating its tables). 472 */ 473 #ifdef ICMPPRINTFS 474 if (icmpprintfs && prt) { 475 char dst_buf[INET_ADDRSTRLEN], gw_buf[INET_ADDRSTRLEN]; 476 477 kprintf("redirect dst %s to %s\n", 478 inet_ntop(AF_INET, &icp->icmp_ip.ip_dst, 479 dst_buf, INET_ADDRSTRLEN), 480 inet_ntop(AF_INET, &icp->icmp_gwaddr, 481 gw_buf, INET_ADDRSTRLEN)); 482 } 483 #endif 484 icmpgw.sin_addr = ip->ip_src; 485 icmpdst.sin_addr = icp->icmp_gwaddr; 486 icmpsrc.sin_addr = icp->icmp_ip.ip_dst; 487 rtredirect_oncpu((struct sockaddr *)&icmpsrc, 488 (struct sockaddr *)&icmpdst, NULL, ICMP_RTREDIRECT_FLAGS, 489 (struct sockaddr *)&icmpgw); 490 kpfctlinput_direct(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc); 491 } 492 493 static void 494 icmp_redirect_done_handler(netmsg_t nmsg) 495 { 496 struct netmsg_ctlinput *msg = (struct netmsg_ctlinput *)nmsg; 497 struct mbuf *m = msg->m; 498 int hlen = msg->hlen; 499 500 rip_input(&m, &hlen, msg->proto); 501 } 502 503 static void 504 icmp_redirect_done(struct mbuf *m, int hlen, boolean_t dispatch_rip) 505 { 506 struct rt_addrinfo rtinfo; 507 struct sockaddr_in icmpsrc = { sizeof(struct sockaddr_in), AF_INET }; 508 struct sockaddr_in icmpdst = { sizeof(struct sockaddr_in), AF_INET }; 509 struct sockaddr_in icmpgw = { sizeof(struct sockaddr_in), AF_INET }; 510 struct icmp *icp; 511 struct ip *ip; 512 513 ip = mtod(m, struct ip *); 514 icp = mtodoff(m, struct icmp *, hlen); 515 516 icmpgw.sin_addr = ip->ip_src; 517 icmpdst.sin_addr = icp->icmp_gwaddr; 518 icmpsrc.sin_addr = icp->icmp_ip.ip_dst; 519 520 bzero(&rtinfo, sizeof(struct rt_addrinfo)); 521 rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&icmpsrc; 522 rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&icmpdst; 523 rtinfo.rti_info[RTAX_NETMASK] = NULL; 524 rtinfo.rti_info[RTAX_AUTHOR] = (struct sockaddr *)&icmpgw; 525 rt_missmsg(RTM_REDIRECT, &rtinfo, ICMP_RTREDIRECT_FLAGS, 0); 526 527 if (dispatch_rip) { 528 struct netmsg_ctlinput *msg = &m->m_hdr.mh_ctlmsg; 529 530 netmsg_init(&msg->base, NULL, &netisr_apanic_rport, 0, 531 icmp_redirect_done_handler); 532 lwkt_sendmsg(netisr_cpuport(0), &msg->base.lmsg); 533 } 534 } 535 536 static void 537 icmp_redirect_handler(netmsg_t nmsg) 538 { 539 struct netmsg_ctlinput *msg = (struct netmsg_ctlinput *)nmsg; 540 int nextcpu; 541 542 ASSERT_NETISR_NCPUS(mycpuid); 543 544 icmp_redirect(msg->m, msg->hlen, FALSE); 545 546 nextcpu = mycpuid + 1; 547 if (nextcpu < netisr_ncpus) 548 lwkt_forwardmsg(netisr_cpuport(nextcpu), &msg->base.lmsg); 549 else 550 icmp_redirect_done(msg->m, msg->hlen, TRUE); 551 } 552 553 static boolean_t 554 icmp_redirect_start(struct mbuf *m, int hlen, int proto) 555 { 556 struct netmsg_ctlinput *msg; 557 558 ASSERT_NETISR0; 559 560 icmp_redirect(m, hlen, TRUE); 561 562 if (netisr_ncpus == 1) { 563 /* There is only one netisr; done */ 564 icmp_redirect_done(m, hlen, FALSE); 565 return FALSE; 566 } 567 568 msg = &m->m_hdr.mh_ctlmsg; 569 netmsg_init(&msg->base, NULL, &netisr_apanic_rport, 0, 570 icmp_redirect_handler); 571 msg->m = m; 572 msg->cmd = PRC_REDIRECT_HOST; 573 msg->hlen = hlen; 574 msg->proto = proto; 575 576 lwkt_sendmsg(netisr_cpuport(1), &msg->base.lmsg); 577 return TRUE; 578 } 579 580 /* 581 * Process a received ICMP message. 582 */ 583 int 584 icmp_input(struct mbuf **mp, int *offp, int proto) 585 { 586 struct sockaddr_in icmpsrc = { sizeof(struct sockaddr_in), AF_INET }; 587 struct sockaddr_in icmpdst = { sizeof(struct sockaddr_in), AF_INET }; 588 struct icmp *icp; 589 struct in_ifaddr *ia; 590 struct mbuf *m = *mp; 591 struct ip *ip = mtod(m, struct ip *); 592 int icmplen = ip->ip_len; 593 int i, hlen; 594 int code; 595 596 ASSERT_NETISR0; 597 598 *mp = NULL; 599 hlen = *offp; 600 601 /* 602 * Locate icmp structure in mbuf, and check 603 * that not corrupted and of at least minimum length. 604 */ 605 #ifdef ICMPPRINTFS 606 if (icmpprintfs) { 607 char src_buf[INET_ADDRSTRLEN], dst_buf[INET_ADDRSTRLEN]; 608 609 kprintf("icmp_input from %s to %s, len %d\n", 610 inet_ntop(AF_INET, &ip->ip_src, src_buf, INET_ADDRSTRLEN), 611 inet_ntop(AF_INET, &ip->ip_dst, dst_buf, INET_ADDRSTRLEN), 612 icmplen); 613 } 614 #endif 615 if (icmplen < ICMP_MINLEN) { 616 icmpstat.icps_tooshort++; 617 goto freeit; 618 } 619 i = hlen + min(icmplen, ICMP_ADVLENMIN); 620 if (m->m_len < i && (m = m_pullup(m, i)) == NULL) { 621 icmpstat.icps_tooshort++; 622 return(IPPROTO_DONE); 623 } 624 ip = mtod(m, struct ip *); 625 626 if (in_cksum_skip(m, hlen + icmplen, hlen)) { 627 icmpstat.icps_checksum++; 628 goto freeit; 629 } 630 icp = (struct icmp *)((caddr_t)ip + hlen); 631 632 #ifdef ICMPPRINTFS 633 if (icmpprintfs) 634 kprintf("icmp_input, type %d code %d\n", icp->icmp_type, 635 icp->icmp_code); 636 #endif 637 638 /* 639 * Message type specific processing. 640 */ 641 if (icp->icmp_type > ICMP_MAXTYPE) 642 goto raw; 643 icmpstat.icps_inhist[icp->icmp_type]++; 644 code = icp->icmp_code; 645 switch (icp->icmp_type) { 646 647 case ICMP_UNREACH: 648 switch (code) { 649 case ICMP_UNREACH_NET: 650 case ICMP_UNREACH_HOST: 651 case ICMP_UNREACH_SRCFAIL: 652 case ICMP_UNREACH_NET_UNKNOWN: 653 case ICMP_UNREACH_HOST_UNKNOWN: 654 case ICMP_UNREACH_ISOLATED: 655 case ICMP_UNREACH_TOSNET: 656 case ICMP_UNREACH_TOSHOST: 657 case ICMP_UNREACH_HOST_PRECEDENCE: 658 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 659 code = PRC_UNREACH_NET; 660 break; 661 662 case ICMP_UNREACH_NEEDFRAG: 663 code = PRC_MSGSIZE; 664 break; 665 666 /* 667 * RFC 1122, Sections 3.2.2.1 and 4.2.3.9. 668 * Treat subcodes 2,3 as immediate RST 669 */ 670 case ICMP_UNREACH_PROTOCOL: 671 case ICMP_UNREACH_PORT: 672 code = PRC_UNREACH_PORT; 673 break; 674 675 case ICMP_UNREACH_NET_PROHIB: 676 case ICMP_UNREACH_HOST_PROHIB: 677 case ICMP_UNREACH_FILTER_PROHIB: 678 code = PRC_UNREACH_ADMIN_PROHIB; 679 break; 680 681 default: 682 goto badcode; 683 } 684 goto deliver; 685 686 case ICMP_TIMXCEED: 687 if (code > 1) 688 goto badcode; 689 code += PRC_TIMXCEED_INTRANS; 690 goto deliver; 691 692 case ICMP_PARAMPROB: 693 if (code > 1) 694 goto badcode; 695 code = PRC_PARAMPROB; 696 goto deliver; 697 698 case ICMP_SOURCEQUENCH: 699 if (code) 700 goto badcode; 701 if (discard_sourcequench) 702 break; 703 code = PRC_QUENCH; 704 deliver: 705 /* 706 * Problem with datagram; advise higher level routines. 707 */ 708 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 709 IP_VHL_HL(icp->icmp_ip.ip_vhl) < (sizeof(struct ip) >> 2)) { 710 icmpstat.icps_badlen++; 711 goto freeit; 712 } 713 /* Discard ICMP's in response to multicast packets */ 714 if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr))) 715 goto badcode; 716 #ifdef ICMPPRINTFS 717 if (icmpprintfs) 718 kprintf("deliver to protocol %d\n", icp->icmp_ip.ip_p); 719 #endif 720 icmpsrc.sin_addr = icp->icmp_ip.ip_dst; 721 722 /* 723 * MTU discovery 724 */ 725 if (code == PRC_MSGSIZE) { 726 /* Run MTU discovery in all netisrs */ 727 if (icmp_mtudisc_start(m, hlen, proto)) { 728 /* Forwarded; done */ 729 return IPPROTO_DONE; 730 } 731 /* Move on; run rip_input() directly */ 732 } else { 733 struct protosw *pr; 734 struct lwkt_port *port; 735 int cpu; 736 737 pr = &inetsw[ip_protox[icp->icmp_ip.ip_p]]; 738 port = so_pr_ctlport(pr, code, 739 (struct sockaddr *)&icmpsrc, &icp->icmp_ip, &cpu); 740 if (port != NULL) { 741 if (cpu == netisr_ncpus) { 742 if (netisr_ncpus > 1) { 743 /* 744 * Run pr_ctlinput in all 745 * netisrs 746 */ 747 icmp_ctlinput_global_start(m, 748 code, hlen, proto); 749 return IPPROTO_DONE; 750 } 751 /* 752 * There is only one netisr; run 753 * pr_ctlinput directly. 754 */ 755 } else if (cpu != mycpuid) { 756 /* 757 * Send to the target netisr to run 758 * pr_ctlinput. 759 */ 760 icmp_ctlinput_start(m, port, 761 code, hlen, proto); 762 return IPPROTO_DONE; 763 } 764 765 /* 766 * The target netisr is this netisr. 767 * 768 * XXX if the packet contains [IPv4 AH TCP], 769 * we can't make a notification to TCP layer. 770 */ 771 so_pr_ctlinput_direct(pr, code, 772 (struct sockaddr *)&icmpsrc, &icp->icmp_ip); 773 } 774 /* Move on; run rip_input() directly */ 775 } 776 break; 777 badcode: 778 icmpstat.icps_badcode++; 779 break; 780 781 case ICMP_ECHO: 782 if (!icmpbmcastecho 783 && (m->m_flags & (M_MCAST | M_BCAST)) != 0) { 784 icmpstat.icps_bmcastecho++; 785 break; 786 } 787 icp->icmp_type = ICMP_ECHOREPLY; 788 #ifdef ICMP_BANDLIM 789 if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0) 790 goto freeit; 791 else 792 #endif 793 goto reflect; 794 795 case ICMP_TSTAMP: 796 if (!icmpbmcastecho 797 && (m->m_flags & (M_MCAST | M_BCAST)) != 0) { 798 icmpstat.icps_bmcasttstamp++; 799 break; 800 } 801 if (icmplen < ICMP_TSLEN) { 802 icmpstat.icps_badlen++; 803 break; 804 } 805 icp->icmp_type = ICMP_TSTAMPREPLY; 806 icp->icmp_rtime = iptime(); 807 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */ 808 #ifdef ICMP_BANDLIM 809 if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0) 810 goto freeit; 811 else 812 #endif 813 goto reflect; 814 815 case ICMP_MASKREQ: 816 if (icmpmaskrepl == 0) 817 break; 818 /* 819 * We are not able to respond with all ones broadcast 820 * unless we receive it over a point-to-point interface. 821 */ 822 if (icmplen < ICMP_MASKLEN) 823 break; 824 switch (ip->ip_dst.s_addr) { 825 826 case INADDR_BROADCAST: 827 case INADDR_ANY: 828 icmpdst.sin_addr = ip->ip_src; 829 break; 830 831 default: 832 icmpdst.sin_addr = ip->ip_dst; 833 } 834 ia = (struct in_ifaddr *)ifaof_ifpforaddr( 835 (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif); 836 if (ia == NULL) 837 break; 838 if (ia->ia_ifp == 0) 839 break; 840 icp->icmp_type = ICMP_MASKREPLY; 841 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr; 842 if (ip->ip_src.s_addr == 0) { 843 if (ia->ia_ifp->if_flags & IFF_BROADCAST) 844 ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr; 845 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT) 846 ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr; 847 } 848 reflect: 849 ip->ip_len += hlen; /* since ip_input deducts this */ 850 icmpstat.icps_reflect++; 851 icmpstat.icps_outhist[icp->icmp_type]++; 852 icmp_reflect(m); 853 return(IPPROTO_DONE); 854 855 case ICMP_REDIRECT: 856 if (log_redirect) { 857 char src_buf[INET_ADDRSTRLEN]; 858 char dst_buf[INET_ADDRSTRLEN]; 859 char gwy_buf[INET_ADDRSTRLEN]; 860 861 kprintf("icmp redirect from %s: %s => %s\n", 862 inet_ntop(AF_INET, &ip->ip_src, 863 src_buf, INET_ADDRSTRLEN), 864 inet_ntop(AF_INET, &icp->icmp_ip.ip_dst, 865 dst_buf, INET_ADDRSTRLEN), 866 inet_ntop(AF_INET, &icp->icmp_gwaddr, 867 gwy_buf, INET_ADDRSTRLEN)); 868 } 869 if (drop_redirect) 870 break; 871 if (code > 3) 872 goto badcode; 873 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || 874 IP_VHL_HL(icp->icmp_ip.ip_vhl) < (sizeof(struct ip) >> 2)) { 875 icmpstat.icps_badlen++; 876 break; 877 } 878 #ifdef ICMPPRINTFS 879 if (icmpprintfs) { 880 char dst_buf[INET_ADDRSTRLEN], gw_buf[INET_ADDRSTRLEN]; 881 882 kprintf("redirect dst %s to %s\n", 883 inet_ntop(AF_INET, &icp->icmp_ip.ip_dst, 884 dst_buf, INET_ADDRSTRLEN), 885 inet_ntop(AF_INET, &icp->icmp_gwaddr, 886 gw_buf, INET_ADDRSTRLEN)); 887 } 888 #endif 889 icmpsrc.sin_addr = icp->icmp_ip.ip_dst; 890 891 /* Run redirect in all netisrs */ 892 if (icmp_redirect_start(m, hlen, proto)) { 893 /* Forwarded; done */ 894 return IPPROTO_DONE; 895 } 896 /* Move on; run rip_input() directly */ 897 break; 898 899 /* 900 * No kernel processing for the following; 901 * just fall through to send to raw listener. 902 */ 903 case ICMP_ECHOREPLY: 904 case ICMP_ROUTERADVERT: 905 case ICMP_ROUTERSOLICIT: 906 case ICMP_TSTAMPREPLY: 907 case ICMP_IREQREPLY: 908 case ICMP_MASKREPLY: 909 default: 910 break; 911 } 912 913 raw: 914 *mp = m; 915 rip_input(mp, offp, proto); 916 return(IPPROTO_DONE); 917 918 freeit: 919 m_freem(m); 920 return(IPPROTO_DONE); 921 } 922 923 /* 924 * Reflect the ip packet back to the source 925 */ 926 static void 927 icmp_reflect(struct mbuf *m) 928 { 929 struct ip *ip = mtod(m, struct ip *); 930 struct in_ifaddr *ia; 931 struct in_ifaddr_container *iac; 932 struct ifaddr_container *ifac; 933 struct ifnet *ifp; 934 struct in_addr t; 935 struct mbuf *opts = NULL; 936 int optlen = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof(struct ip); 937 struct route *ro = NULL, rt; 938 939 if (!in_canforward(ip->ip_src) && 940 ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) != 941 (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) { 942 m_freem(m); /* Bad return address */ 943 icmpstat.icps_badaddr++; 944 goto done; /* Ip_output() will check for broadcast */ 945 } 946 t = ip->ip_dst; 947 ip->ip_dst = ip->ip_src; 948 949 ro = &rt; 950 bzero(ro, sizeof *ro); 951 952 /* 953 * If the incoming packet was addressed directly to us, 954 * use dst as the src for the reply. Otherwise (broadcast 955 * or anonymous), use the address which corresponds 956 * to the incoming interface. 957 */ 958 ia = NULL; 959 LIST_FOREACH(iac, INADDR_HASH(t.s_addr), ia_hash) { 960 if (t.s_addr == IA_SIN(iac->ia)->sin_addr.s_addr) { 961 ia = iac->ia; 962 goto match; 963 } 964 } 965 ifp = m->m_pkthdr.rcvif; 966 if (ifp != NULL && (ifp->if_flags & IFF_BROADCAST)) { 967 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 968 struct ifaddr *ifa = ifac->ifa; 969 970 if (ifa->ifa_addr->sa_family != AF_INET) 971 continue; 972 ia = ifatoia(ifa); 973 if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 974 t.s_addr) 975 goto match; 976 } 977 } 978 /* 979 * If the packet was transiting through us, use the address of 980 * the interface the packet came through in. If that interface 981 * doesn't have a suitable IP address, the normal selection 982 * criteria apply. 983 */ 984 if (icmp_rfi && ifp != NULL) { 985 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 986 struct ifaddr *ifa = ifac->ifa; 987 988 if (ifa->ifa_addr->sa_family != AF_INET) 989 continue; 990 ia = ifatoia(ifa); 991 goto match; 992 } 993 } 994 /* 995 * If the incoming packet was not addressed directly to us, use 996 * designated interface for icmp replies specified by sysctl 997 * net.inet.icmp.reply_src (default not set). Otherwise continue 998 * with normal source selection. 999 */ 1000 if (icmp_reply_src[0] != '\0' && 1001 (ifp = ifunit_netisr(icmp_reply_src))) { 1002 TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) { 1003 struct ifaddr *ifa = ifac->ifa; 1004 1005 if (ifa->ifa_addr->sa_family != AF_INET) 1006 continue; 1007 ia = ifatoia(ifa); 1008 goto match; 1009 } 1010 } 1011 /* 1012 * If the packet was transiting through us, use the address of 1013 * the interface that is the closest to the packet source. 1014 * When we don't have a route back to the packet source, stop here 1015 * and drop the packet. 1016 */ 1017 ia = ip_rtaddr(ip->ip_dst, ro); 1018 if (ia == NULL) { 1019 m_freem(m); 1020 icmpstat.icps_noroute++; 1021 goto done; 1022 } 1023 match: 1024 t = IA_SIN(ia)->sin_addr; 1025 ip->ip_src = t; 1026 ip->ip_ttl = ip_defttl; 1027 1028 if (optlen > 0) { 1029 u_char *cp; 1030 int opt, cnt; 1031 u_int len; 1032 1033 /* 1034 * Retrieve any source routing from the incoming packet; 1035 * add on any record-route or timestamp options. 1036 */ 1037 cp = (u_char *) (ip + 1); 1038 if ((opts = ip_srcroute(m)) == NULL && 1039 (opts = m_gethdr(M_NOWAIT, MT_HEADER))) { 1040 opts->m_len = sizeof(struct in_addr); 1041 mtod(opts, struct in_addr *)->s_addr = 0; 1042 } 1043 if (opts) { 1044 #ifdef ICMPPRINTFS 1045 if (icmpprintfs) 1046 kprintf("icmp_reflect optlen %d rt %d => ", 1047 optlen, opts->m_len); 1048 #endif 1049 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) { 1050 opt = cp[IPOPT_OPTVAL]; 1051 if (opt == IPOPT_EOL) 1052 break; 1053 if (opt == IPOPT_NOP) 1054 len = 1; 1055 else { 1056 if (cnt < IPOPT_OLEN + sizeof *cp) 1057 break; 1058 len = cp[IPOPT_OLEN]; 1059 if (len < IPOPT_OLEN + sizeof *cp || 1060 len > cnt) 1061 break; 1062 } 1063 /* 1064 * Should check for overflow, but it 1065 * "can't happen". 1066 */ 1067 if (opt == IPOPT_RR || opt == IPOPT_TS || 1068 opt == IPOPT_SECURITY) { 1069 bcopy(cp, 1070 mtod(opts, caddr_t) + opts->m_len, 1071 len); 1072 opts->m_len += len; 1073 } 1074 } 1075 /* Terminate & pad, if necessary */ 1076 cnt = opts->m_len % 4; 1077 if (cnt) { 1078 for (; cnt < 4; cnt++) { 1079 *(mtod(opts, caddr_t) + opts->m_len) = 1080 IPOPT_EOL; 1081 opts->m_len++; 1082 } 1083 } 1084 #ifdef ICMPPRINTFS 1085 if (icmpprintfs) 1086 kprintf("%d\n", opts->m_len); 1087 #endif 1088 } 1089 /* 1090 * Now strip out original options by copying rest of first 1091 * mbuf's data back, and adjust the IP length. 1092 */ 1093 ip->ip_len -= optlen; 1094 ip->ip_vhl = IP_VHL_BORING; 1095 m->m_len -= optlen; 1096 if (m->m_flags & M_PKTHDR) 1097 m->m_pkthdr.len -= optlen; 1098 optlen += sizeof(struct ip); 1099 bcopy((caddr_t)ip + optlen, ip + 1, 1100 m->m_len - sizeof(struct ip)); 1101 } 1102 m->m_pkthdr.fw_flags &= FW_MBUF_GENERATED; 1103 m->m_flags &= ~(M_BCAST|M_MCAST); 1104 icmp_send(m, opts, ro); 1105 done: 1106 if (opts) 1107 m_free(opts); 1108 if (ro && ro->ro_rt) 1109 RTFREE(ro->ro_rt); 1110 } 1111 1112 /* 1113 * Send an icmp packet back to the ip level, 1114 * after supplying a checksum. 1115 */ 1116 static void 1117 icmp_send(struct mbuf *m, struct mbuf *opts, struct route *rt) 1118 { 1119 struct ip *ip = mtod(m, struct ip *); 1120 struct icmp *icp; 1121 int hlen; 1122 1123 hlen = IP_VHL_HL(ip->ip_vhl) << 2; 1124 m->m_data += hlen; 1125 m->m_len -= hlen; 1126 icp = mtod(m, struct icmp *); 1127 icp->icmp_cksum = 0; 1128 icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen); 1129 m->m_data -= hlen; 1130 m->m_len += hlen; 1131 m->m_pkthdr.rcvif = NULL; 1132 #ifdef ICMPPRINTFS 1133 if (icmpprintfs) { 1134 char dst_buf[INET_ADDRSTRLEN], src_buf[INET_ADDRSTRLEN]; 1135 1136 kprintf("icmp_send dst %s src %s\n", 1137 inet_ntop(AF_INET, &ip->ip_dst, dst_buf, INET_ADDRSTRLEN), 1138 inet_ntop(AF_INET, &ip->ip_src, src_buf, INET_ADDRSTRLEN)); 1139 } 1140 #endif 1141 ip_output(m, opts, rt, 0, NULL, NULL); 1142 } 1143 1144 n_time 1145 iptime(void) 1146 { 1147 struct timeval atv; 1148 u_long t; 1149 1150 getmicrotime(&atv); 1151 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000; 1152 return (htonl(t)); 1153 } 1154 1155 #if 1 1156 /* 1157 * Return the next larger or smaller MTU plateau (table from RFC 1191) 1158 * given current value MTU. If DIR is less than zero, a larger plateau 1159 * is returned; otherwise, a smaller value is returned. 1160 */ 1161 int 1162 ip_next_mtu(int mtu, int dir) 1163 { 1164 static int mtutab[] = { 1165 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296, 1166 68, 0 1167 }; 1168 int i; 1169 1170 for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) { 1171 if (mtu >= mtutab[i]) 1172 break; 1173 } 1174 1175 if (dir < 0) { 1176 if (i == 0) { 1177 return 0; 1178 } else { 1179 return mtutab[i - 1]; 1180 } 1181 } else { 1182 if (mtutab[i] == 0) { 1183 return 0; 1184 } else if(mtu > mtutab[i]) { 1185 return mtutab[i]; 1186 } else { 1187 return mtutab[i + 1]; 1188 } 1189 } 1190 } 1191 #endif 1192 1193 #ifdef ICMP_BANDLIM 1194 /* 1195 * badport_bandlim() - check for ICMP bandwidth limit 1196 * 1197 * Return 0 if it is ok to send an ICMP error response, -1 if we have 1198 * hit our bandwidth limit and it is not ok. 1199 * 1200 * If icmplim is <= 0, the feature is disabled and 0 is returned. 1201 * 1202 * For now we separate the TCP and UDP subsystems w/ different 'which' 1203 * values. We may eventually remove this separation (and simplify the 1204 * code further). 1205 * 1206 * Note that the printing of the error message is delayed so we can 1207 * properly print the icmp error rate that the system was trying to do 1208 * (i.e. 22000/100 pps, etc...). This can cause long delays in printing 1209 * the 'final' error, but it doesn't make sense to solve the printing 1210 * delay with more complex code. 1211 */ 1212 int 1213 badport_bandlim(int which) 1214 { 1215 static int lticks[BANDLIM_MAX + 1]; 1216 static int lpackets[BANDLIM_MAX + 1]; 1217 int dticks; 1218 const char *bandlimittype[] = { 1219 "Limiting icmp unreach response", 1220 "Limiting icmp ping response", 1221 "Limiting icmp tstamp response", 1222 "Limiting closed port RST response", 1223 "Limiting open port RST response" 1224 }; 1225 1226 /* 1227 * Return ok status if feature disabled or argument out of 1228 * ranage. 1229 */ 1230 1231 if (icmplim <= 0 || which > BANDLIM_MAX || which < 0) 1232 return(0); 1233 dticks = ticks - lticks[which]; 1234 1235 /* 1236 * reset stats when cumulative dt exceeds one second. 1237 */ 1238 1239 if ((unsigned int)dticks > hz) { 1240 if (lpackets[which] > icmplim && icmplim_output) { 1241 kprintf("%s from %d to %d packets per second\n", 1242 bandlimittype[which], 1243 lpackets[which], 1244 icmplim 1245 ); 1246 } 1247 lticks[which] = ticks; 1248 lpackets[which] = 0; 1249 } 1250 1251 /* 1252 * bump packet count 1253 */ 1254 1255 if (++lpackets[which] > icmplim) { 1256 return(-1); 1257 } 1258 return(0); 1259 } 1260 #endif 1261