1 #ifdef RCSIDENT 2 static char rcsident[] = "$Header: icmp.c,v 1.17 85/06/18 14:53:43 walsh Exp $"; 3 #endif 4 5 #include "../h/param.h" 6 #include "../h/systm.h" 7 #include "../h/mbuf.h" 8 #include "../h/socket.h" 9 #include "../h/socketvar.h" 10 #include "../h/protosw.h" 11 #include "../h/syslog.h" 12 13 #include "../net/route.h" 14 #include "../net/if.h" 15 16 #include "../bbnnet/in.h" 17 #include "../bbnnet/net.h" 18 #include "../bbnnet/in_pcb.h" 19 #include "../bbnnet/in_var.h" 20 21 #include "../bbnnet/ip.h" 22 #include "../bbnnet/icmp.h" 23 #include "../bbnnet/nopcb.h" 24 #ifdef HMPTRAPS 25 #include "../bbnnet/hmp_traps.h" 26 #endif 27 28 #include "../h/errno.h" 29 #include "../h/time.h" 30 #include "../h/kernel.h" 31 32 #ifdef RCSIDENT 33 static char rcsicmphdr[] = RCSICMPHDR; 34 #endif 35 36 extern int nosum; 37 38 #define NICTYPE 17 39 40 /* ICMP message formats */ 41 #define ICBAD 0 /* unimplemented */ 42 #define ICERR 1 /* error format (use header) */ 43 #define ICDAT 2 /* data format (use id) */ 44 #define ICINT 3 /* data format (handle internally) */ 45 46 char icaction[NICTYPE] = 47 { 48 ICDAT, ICBAD, ICBAD, ICERR, ICERR, ICERR, ICBAD, 49 ICBAD, ICINT, ICBAD, ICBAD, ICERR, ICERR, ICINT, 50 ICDAT, ICINT, ICDAT 51 } ; 52 53 #define ICLEN1 (sizeof(struct ip) + ICMPSIZE + sizeof(struct ip) + ICMP_ERRLEN) 54 #define ICLEN2 (sizeof(struct ip) + ICMPSIZE + 3 * sizeof(long)) 55 56 int icpullup[NICTYPE] = 57 { 58 0, /* echo reply */ 59 0, 60 0, 61 ICLEN1, /* unreachable */ 62 ICLEN1, /* source quench */ 63 ICLEN1, /* redirect */ 64 0, 65 0, 66 0, /* echo request */ 67 0, 68 0, 69 ICLEN1, /* time exceeded */ 70 ICLEN1, /* parameter problem */ 71 ICLEN2, /* timestamp */ 72 ICLEN2, /* timestamp reply */ 73 0, /* information request */ 74 0 /* information reply */ 75 } ; 76 77 char icunrch[ICMP_UNRCH_NUM] = 78 { 79 PRC_UNREACH_NET, PRC_UNREACH_HOST, PRC_UNREACH_PROTOCOL, 80 PRC_UNREACH_PORT, PRC_MSGSIZE, PRC_UNREACH_HOST 81 } ; 82 83 struct icmp_stat icmpstat; 84 85 86 u_long iptime() 87 { 88 int s = spl7(); /* berkeley had spl6() */ 89 u_long t; 90 91 t = (time.tv_sec % (24*60*60)) * 1000 + time.tv_usec / 1000; 92 splx(s); 93 return (htonl(t)); 94 } 95 96 know_gateway2 (gaddr, list) 97 u_long gaddr; 98 struct mbuf *list; 99 { 100 register struct rtentry *rt; 101 102 while (list) 103 { 104 rt = mtod(list, struct rtentry *); 105 if ((rt->rt_flags & RTF_GATEWAY) && 106 (rt->rt_dst.sa_family == AF_INET) && 107 (((struct sockaddr_in *) &rt->rt_gateway)->sin_addr.s_addr == gaddr)) 108 return (TRUE); 109 list = list->m_next; 110 } 111 return (FALSE); 112 } 113 114 know_gateway (gaddr) 115 u_long gaddr; 116 { 117 register int i; 118 119 for (i=0 ; i<RTHASHSIZ ; i++) 120 { 121 if (know_gateway2 (gaddr, rthost[i]) || 122 know_gateway2 (gaddr, rtnet[i])) 123 return (TRUE); 124 } 125 return (FALSE); 126 } 127 128 #ifdef BBNPING 129 /* 130 * Note that pinging is done on a per-route basis. 131 * 132 * 1. If a gateway is used by more than one route, then for routes 133 * with no active (measured by new data xfer) tcp connections, 134 * the gateway will be pinged. 135 * It is possible that every PINGTIME/2 seconds a gateway would 136 * be sent multiple icmp ECHO REQUESTS, but that is unlikely (uncommon) 137 * and we can worry about that if it actually proves to be a problem. 138 * 139 * 2. Since the ping count is incremented on a per-route basis, but 140 * ECHO REPLIES are dealt with on a per-address basis, a gateway is 141 * not prematurely pinged out if it is used by more than one active 142 * routing entry. 143 */ 144 145 static check_ping(list) 146 register struct mbuf *list; 147 { 148 register struct rtentry *rt; 149 register struct sockaddr_in *sin; 150 register struct mbuf *next; 151 152 while (list) 153 { 154 rt = mtod(list, struct rtentry *); 155 next = list->m_next; /* in case remove it from list */ 156 157 if ((rt->rt_flags & RTF_GATEWAY) && 158 (rt->rt_dst.sa_family == AF_INET)) 159 { 160 sin = (struct sockaddr_in *) &rt->rt_gateway; 161 if ((rt->rt_refcnt > 0) && (rt->rt_flags & RTF_UP)) 162 { 163 if (rt->irt_pings >= MAXPING) 164 { 165 /* 166 * Too many unanswered pings. re-route 167 * connections using this gateway. Usually, 168 * this happens because the gateway is flooded 169 * with traffic. 170 */ 171 union { u_long ul; u_char c[4]; } a; 172 173 a.ul = sin->sin_addr.s_addr; 174 log(LOG_INFO, "gw %d.%d.%d.%d pinged out\n", 175 a.c[0], a.c[1], a.c[2], a.c[3]); 176 177 rt->irt_pings = 0; 178 ip_gdown(sin->sin_addr.s_addr); 179 } 180 else 181 { 182 /* 183 * Ping him again. 184 * See rcv_ack() for comparison with zero here. 185 */ 186 rt->irt_pings ++; 187 if (rt->irt_pings > 0) 188 { 189 /* 190 * count ping even if doesn't get to 191 * interface (ENOBUFS) or other error 192 * (EHOSTDOWN if no gateway at that 193 * address on an IMP network). 194 */ 195 196 ping (sin->sin_addr); 197 icmpstat.ic_pings ++; 198 } 199 else 200 icmpstat.ic_svpings ++; 201 } 202 } 203 else 204 { 205 if (rt->rt_flags & RTF_REINSTATE) 206 { 207 /* 208 * The gateway pinged out or died at some point. 209 * Let's see if it's back up or if our 210 * re-routing of current connections in ip_gdown 211 * has let it breathe again. Wait a while 212 * before try to use it again. 213 */ 214 rt->irt_gdown --; 215 if (rt->irt_gdown <= 0) 216 { 217 rt->irt_gdown = 0; 218 /* 219 * Wait until we know it's alive 220 * for certain. Ping it. 221 */ 222 ping (sin->sin_addr); 223 } 224 } 225 } 226 } 227 228 list = next; 229 } 230 } 231 232 static reset_ping(list, addr) 233 register struct mbuf *list; 234 register u_long addr; 235 { 236 register struct rtentry *rt; 237 238 while (list) 239 { 240 rt = mtod(list, struct rtentry *); 241 if ((rt->rt_flags & RTF_GATEWAY) && 242 (rt->rt_dst.sa_family == AF_INET)) 243 { 244 if (((struct sockaddr_in *) &rt->rt_gateway)->sin_addr.s_addr == addr) 245 { 246 if (rt->rt_flags & RTF_REINSTATE) 247 { 248 if (rt->irt_gdown == 0) 249 { 250 /* 251 * Was not a slow echo reply. If was dead, 252 * use it again. If was flooded, new connections 253 * can now use it (old shifted away). 254 */ 255 rt->rt_flags |= RTF_UP; 256 rt->rt_flags &= ~RTF_REINSTATE; 257 rt->rt_refcnt --; /* see ip_gdown() */ 258 } 259 } 260 else 261 rt->irt_pings = 0; 262 } 263 } 264 list = list->m_next; 265 } 266 } 267 268 /* 269 * Would be nice if we could use HOSTHASH/NETHASH/0, but the hashing is done 270 * on the destination, not the intermediary gateway. 271 */ 272 got_ping(addr) 273 register u_long addr; 274 { 275 register int i; 276 277 for (i=0 ; i<RTHASHSIZ ; i++) 278 { 279 reset_ping(rthost[i], addr); 280 reset_ping(rtnet[i], addr); 281 } 282 } 283 #endif 284 285 /* 286 * Process ICMP messages. Called directly from ip_input processor. 287 */ 288 icmp(mp) 289 register struct mbuf *mp; 290 { 291 register struct ip *ip; 292 register struct icmp *icp; 293 struct in_ifaddr *ia; 294 int ilen; 295 int prccode; 296 297 icmpstat.ic_total ++; 298 299 /* 300 * see ip_input() 301 */ 302 if ((mp->m_off > MMAXOFF) || 303 (mp->m_len < sizeof(struct ip) + ICMPSIZE)) 304 { 305 if ((mp = m_pullup(mp, sizeof(struct ip) + ICMPSIZE)) == NULL) 306 { 307 icmpstat.ic_tooshort ++; 308 return; 309 } 310 } 311 ip = mtod(mp, struct ip *); 312 icp = (struct icmp *) (ip+1); 313 314 /* 315 * watch for fools sending out broadcast ICMP packets 316 * Don't check against inetifp, since is up to ip_input whether to receive 317 * on some interface rather than send to self for input on dst interface. 318 */ 319 ia = in_iawithaddr(ip->ip_dst, FALSE); 320 if (ia == NULL) 321 { 322 /* drop it */ 323 m_freem(mp); 324 return; 325 } 326 327 /* filter out message types */ 328 329 if (icp->ic_type >= NICTYPE || icaction[icp->ic_type] == ICBAD) 330 { 331 icmpstat.ic_drops++; 332 goto badret; 333 } 334 335 if (mp->m_len < icpullup[icp->ic_type]) 336 { 337 if ((mp = m_pullup(mp, icpullup[icp->ic_type])) == NULL) 338 { 339 icmpstat.ic_tooshort ++; 340 return; 341 } 342 ip = mtod(mp, struct ip *); 343 icp = (struct icmp *) (ip+1); 344 } 345 mp->m_off += sizeof(struct ip); 346 mp->m_len -= sizeof(struct ip); 347 348 ilen = ip->ip_len; 349 350 { 351 register u_short his_sum, our_sum; 352 353 his_sum = (u_short)icp->ic_sum; 354 icp->ic_sum = 0; 355 if (his_sum != (our_sum = (u_short)in_cksum(mp, ilen))) 356 { 357 icmpstat.ic_badsum++; 358 if (! nosum) 359 { 360 /* note that the icmp header doesn't overlap IP */ 361 #ifdef HMPTRAPS 362 /* hmp_trap(T_ICMP_CKSUM, (caddr_t),0); */ 363 #endif 364 inet_cksum_err ("icmp", ip, (u_long) his_sum, (u_long) our_sum); 365 netlog(mp); 366 return; 367 } 368 } 369 } 370 371 /* 372 * Now do any processing. Some messages are handled here, 373 * others are passed up ctlinput path for further processing. 374 */ 375 376 switch (icp->ic_type) 377 { 378 379 case ICMP_UNRCH: /* destination unreachable */ 380 381 if (icp->ic_code < ICMP_UNRCH_NUM) 382 { 383 register int (*ctlfunc)(); 384 385 prccode = icunrch[icp->ic_code]; 386 passup: 387 ctlfunc = ipsw[icp->ic_iphdr.ip_p].ipsw_user->pr_ctlinput; 388 (*ctlfunc) (prccode, (caddr_t) icp); 389 } 390 break; 391 392 case ICMP_SRCQ: /* source quench */ 393 394 /* 395 * At the IP level, we could try to reroute the connection and see if we 396 * come up with a less loaded gateway. Problem with this is that we know 397 * total number of packets sent over a route, not the recent traffic load. 398 */ 399 icmpstat.ic_quenches++; 400 prccode = PRC_QUENCH; 401 #ifdef HMPTRAPS 402 /* hmp_trap(T_ICMP_SRCQ, (caddr_t)0, 0); */ 403 #endif 404 goto passup; 405 406 case ICMP_REDIR: /* redirect */ 407 408 icmpstat.ic_redirects ++; 409 410 /* 411 * Sorry, we only trust the connected set of gateways 412 * that includes gateways installed by the system 413 * manager. 414 */ 415 if (know_gateway(ip->ip_src.s_addr)) 416 { 417 register struct mbuf **table; 418 419 if (icp->ic_code == ICMP_REDIR_NET) 420 { 421 prccode = PRC_REDIRECT_NET; 422 table = rtnet; 423 } 424 else 425 { 426 prccode = PRC_REDIRECT_HOST; 427 table = rthost; 428 } 429 if (icmp_redirect_route (icp, table)) 430 goto passup; 431 } 432 else 433 { 434 /* 435 * Who are you? Why are you talking to us? 436 * And how do we know the ip source isn't a lie? 437 * (Eg., Catches Symbolics redirection of subnet broadcast.) 438 */ 439 union { u_long ul; u_char c[4]; } a; 440 441 a.ul = ip->ip_src.s_addr; 442 log(LOG_INFO, "Ignoring redirect from %d.%d.%d.%d\n", 443 a.c[0], a.c[1], a.c[2], a.c[3]); 444 } 445 #ifdef HMPTRAPS 446 /* hmp_trap(T_ICMP_REDIR, (caddr_t)0,0); */ 447 #endif 448 break; 449 450 case ICMP_ECHO: /* echo */ 451 452 icp->ic_type = ICMP_ECHOR; 453 icmpstat.ic_echoes++; 454 goto loopback; 455 456 case ICMP_ECHOR: /* echo reply */ 457 458 /* check for gateway ping packets, look for 459 * corresponding gateway entry and set echo count 460 * to zero. 461 */ 462 #ifdef BBNPING 463 if (icp->ic_id == MY_ECHO_ID) 464 got_ping(ip->ip_src.s_addr); 465 #endif 466 break; 467 468 case ICMP_TIMEX: /* time exceeded */ 469 /* 470 * IP time to live field should be associated with the route so 471 * that it can be dynamically adjusted for time exceeded in transit. 472 * If did, would only need to "pass time exceeded in reassembly" 473 * up to protocol (TCP) so that it can better try to avoid IP 474 * fragmentation. 475 */ 476 icmpstat.ic_timex++; 477 prccode = (icp->ic_code == ICMP_TIMEX_XMT) 478 ? PRC_TIMXCEED_INTRANS 479 : PRC_TIMXCEED_REASS; 480 #ifdef HMPTRAPS 481 /* hmp_trap(T_ICMP_TIMEX, (caddr_t)0,0); */ 482 #endif 483 goto passup; 484 485 case ICMP_TIMES: /* timestamp */ 486 487 if (icp->ic_code == 0) 488 { 489 icp->ic_type = ICMP_TIMESR; 490 /* 491 * Can now do timestamps in UT 492 * 493 icp->ic_trcv = (long)time.tv_sec | 0x80; 494 icp->ic_txmt = (long)time.tv_sec | 0x80; 495 */ 496 icp->ic_txmt = icp->ic_trcv = iptime(); 497 goto loopback; 498 } 499 break; 500 501 case ICMP_INFO: /* info request */ 502 /* 503 * He knows his host number, but not his network #, 504 * fill in src & dst as he would have, had he known. 505 */ 506 { 507 register struct in_ifaddr *inaddress; 508 extern struct ifnet *inetifp; 509 510 icp->ic_type = ICMP_INFOR; 511 inaddress = in_iafromif(inetifp); 512 ip->ip_src.s_addr |= inaddress->ia_subnet; 513 ip->ip_dst = redir_addr(ip); 514 } 515 goto loopback; 516 517 case ICMP_PARM: /* parameter problem */ 518 icmpstat.ic_parm++; 519 prccode = PRC_PARAMPROB; 520 #ifdef HMPTRAPS 521 /* hmp_trap(T_ICMP_PARM, (caddr_t)0,0); */ 522 #endif 523 goto passup; 524 } 525 526 badret : 527 m_freem(mp); 528 return; 529 530 loopback : 531 { 532 struct in_addr temp; 533 register int error; 534 535 temp = ip->ip_src; 536 ip->ip_src = ip->ip_dst; 537 ip->ip_dst = temp; 538 /* ip->ip_p = IPPROTO_ICMP; still is from input */ 539 /* ip->ip_tos = 0; use same tos for reply */ 540 541 icp->ic_sum = in_cksum(mp, ilen); 542 mp->m_off -= sizeof(struct ip); 543 mp->m_len += sizeof(struct ip); 544 NOPCB_IPSEND (mp, (int)ip->ip_len, FALSE, error); 545 546 #ifdef lint 547 error = error; 548 #endif 549 550 } 551 } 552 553 554 /* 555 * Ping gateways in use to see if they are still alive. 556 */ 557 ic_timeo() 558 { 559 #ifdef BBNPING 560 register int i; 561 register int level; 562 static int ictimer; 563 564 if (--ictimer > 0) 565 return; 566 ictimer = PINGTIME; 567 568 level = splnet(); 569 for (i=0 ; i<RTHASHSIZ ; i++) 570 { 571 check_ping(rthost[i]); 572 check_ping(rtnet[i]); 573 } 574 splx(level); 575 #endif 576 } 577 578 static struct rtentry *rtfind (dst, via, table) 579 struct in_addr dst; 580 struct in_addr via; 581 struct mbuf *table[]; 582 { 583 register struct mbuf *m; 584 585 struct rtentry *rt; 586 587 if (table == rthost) 588 m = rthost[HOSTHASH(dst.s_addr) % RTHASHSIZ]; 589 else 590 { 591 if (dst.s_addr) 592 { 593 m = rtnet[NETHASH(dst) % RTHASHSIZ]; 594 dst.s_addr = iptonet(dst); 595 } 596 else 597 m = rtnet[0]; 598 } 599 600 while (m) 601 { 602 struct in_addr d, g; 603 604 rt = mtod(m, struct rtentry *); 605 d = satoipa(&rt->rt_dst); 606 g = satoipa(&rt->rt_gateway); 607 if ((d.s_addr == dst.s_addr) && 608 (g.s_addr == via.s_addr) && 609 (rt->rt_dst.sa_family == AF_INET)) 610 { 611 /* then, hash values must be same. */ 612 return (rt); 613 } 614 615 m = m->m_next; 616 } 617 618 return (NULL); 619 } 620 621 622 icmp_redirect_route (ic, table) 623 struct icmp *ic; 624 struct mbuf *table[]; 625 { 626 struct ip *ip; 627 int flags; 628 static struct sockaddr_in red_dst = { AF_INET } ; 629 static struct sockaddr_in red_gtw = { AF_INET } ; 630 631 ip = (struct ip *) ic->ic_data; 632 /* 633 * 1. Make new routing entry so that new connections will use better 634 * route. But only make entry if have not already done so. 635 */ 636 if (!rtfind(ip->ip_dst, ic->ic_gaddr, table)) 637 { 638 char *err; 639 640 /* check reasonableness of redirect */ 641 642 if (in_iawithnet(ic->ic_gaddr) == NULL) 643 { 644 /* 645 * Sorry, can't get there from here. 646 */ 647 union { u_long ul; u_char c[4]; } g, f, t, v; 648 649 err = "No interface for first hop"; 650 perr : 651 652 g.ul = (((struct ip *) (((char *) ic) - sizeof(struct ip)))->ip_src.s_addr); 653 f.ul = ip->ip_src.s_addr; 654 t.ul = ip->ip_dst.s_addr; 655 v.ul = ic->ic_gaddr.s_addr; 656 log(LOG_INFO, 657 "Ignoring ICMP redirect from gw %d.%d.%d.%d? to go from %d.%d.%d.%d to %d.%d.%d.%d via %d.%d.%d.%d : %s\n", 658 g.c[0], g.c[1], g.c[2], g.c[3], 659 f.c[0], f.c[1], f.c[2], f.c[3], 660 t.c[0], t.c[1], t.c[2], t.c[3], 661 v.c[0], v.c[1], v.c[2], v.c[3], 662 err); 663 664 return (FALSE); 665 } 666 667 if (in_iawithaddr(ic->ic_gaddr, TRUE)) 668 { 669 /* 670 * redirect to self is stupid, as is redirect to 671 * broadcast address (which if_iawithaddr will match 672 * for interfaces with IFF_BROADCAST set.) 673 */ 674 err = "redirected to self"; 675 goto perr; 676 } 677 678 if (iptonet(ic->ic_gaddr) != iptonet(ip->ip_src)) 679 { 680 /* 681 * Why is this gateway redirecting us? It is not 682 * giving us a first hop gateway that is on the 683 * local net that we advertise. 684 */ 685 err = "new first hop net <> src net"; 686 goto perr; 687 } 688 689 #ifdef done_in_icmp_c 690 if (! know_gateway(icmp source)) 691 /* 692 * Sorry, we only trust the connected set of gateways 693 * that includes gateways installed by the system 694 * manager. Who are you? Why are you talking to us? 695 */ 696 return; 697 #endif 698 699 /* o.k., I'll believe it */ 700 flags = RTF_UP; 701 if (table == rthost) 702 { 703 flags |= RTF_HOST; 704 red_dst.sin_addr.s_addr = ip->ip_dst.s_addr; 705 } 706 else 707 { 708 flags |= RTF_GATEWAY; 709 red_dst.sin_addr.s_addr = iptonet(ip->ip_dst); 710 } 711 red_gtw.sin_addr.s_addr = ic->ic_gaddr.s_addr; 712 rtinit ((struct sockaddr *) &red_dst, 713 (struct sockaddr *) &red_gtw, 714 flags); 715 } 716 return (TRUE); 717 } 718 719 icmp_redirect_inp(inp, ic, table) 720 struct inpcb *inp; 721 struct icmp *ic; 722 struct mbuf **table; 723 { 724 struct rtentry *rt; 725 726 /* 727 * 2. Redirect current connection. 728 */ 729 730 #ifdef neverdef 731 /* 732 * This would try to balance load across gateways, but 733 * that's something best done by the gateway before it 734 * sends a redirect. Also, consider 3 gateways of which 735 * two are bad, and possibility of bouncing between the 736 * two bad ones until their use counts got high enough. 737 * 738 * Currently, gateways only take into account # hops, not 739 * load. 740 */ 741 if (rt = inp->inp_route.ro_rt) 742 { 743 short oflags; 744 745 /* try to force a different path */ 746 oflags = rt->rt_flags; 747 rt->rt_flags &= ~RTF_UP; 748 /* but don't lose current route */ 749 rt->rt_refcnt ++; 750 (void) ip_reroute (inp); 751 rt->rt_refcnt --; 752 rt->rt_flags = oflags; 753 } 754 #endif 755 if (rt = rtfind (ic->ic_iphdr.ip_dst, ic->ic_gaddr, table)) 756 { 757 if (rt->rt_flags & RTF_UP) 758 { 759 /* 760 * packets go out an interface with our local 761 * IP address. Know true from checks after 762 * first call to rtfind above. 763 * 764 * Interface has to be at least as up as 765 * for previous route, so don't bother to 766 * check. 767 */ 768 if (inp->inp_route.ro_rt) 769 rtfree (inp->inp_route.ro_rt); 770 inp->inp_route.ro_rt = rt; 771 rt->rt_refcnt ++; 772 } 773 else 774 log(LOG_INFO, "ICMP Redirect to down route\n"); 775 } 776 else 777 log(LOG_INFO, "ICMP Redirect route not installed?\n"); 778 } 779