1 #ifdef RCSIDENT
2 static char rcsident[] = "$Header: icmp.c,v 1.17 85/06/18 14:53:43 walsh Exp $";
3 #endif
4
5 #include "../h/param.h"
6 #include "../h/systm.h"
7 #include "../h/mbuf.h"
8 #include "../h/socket.h"
9 #include "../h/socketvar.h"
10 #include "../h/protosw.h"
11 #include "../h/syslog.h"
12
13 #include "../net/route.h"
14 #include "../net/if.h"
15
16 #include "../bbnnet/in.h"
17 #include "../bbnnet/net.h"
18 #include "../bbnnet/in_pcb.h"
19 #include "../bbnnet/in_var.h"
20
21 #include "../bbnnet/ip.h"
22 #include "../bbnnet/icmp.h"
23 #include "../bbnnet/nopcb.h"
24 #ifdef HMPTRAPS
25 #include "../bbnnet/hmp_traps.h"
26 #endif
27
28 #include "../h/errno.h"
29 #include "../h/time.h"
30 #include "../h/kernel.h"
31
32 #ifdef RCSIDENT
33 static char rcsicmphdr[] = RCSICMPHDR;
34 #endif
35
36 extern int nosum;
37
38 #define NICTYPE 17
39
40 /* ICMP message formats */
41 #define ICBAD 0 /* unimplemented */
42 #define ICERR 1 /* error format (use header) */
43 #define ICDAT 2 /* data format (use id) */
44 #define ICINT 3 /* data format (handle internally) */
45
46 char icaction[NICTYPE] =
47 {
48 ICDAT, ICBAD, ICBAD, ICERR, ICERR, ICERR, ICBAD,
49 ICBAD, ICINT, ICBAD, ICBAD, ICERR, ICERR, ICINT,
50 ICDAT, ICINT, ICDAT
51 } ;
52
53 #define ICLEN1 (sizeof(struct ip) + ICMPSIZE + sizeof(struct ip) + ICMP_ERRLEN)
54 #define ICLEN2 (sizeof(struct ip) + ICMPSIZE + 3 * sizeof(long))
55
56 int icpullup[NICTYPE] =
57 {
58 0, /* echo reply */
59 0,
60 0,
61 ICLEN1, /* unreachable */
62 ICLEN1, /* source quench */
63 ICLEN1, /* redirect */
64 0,
65 0,
66 0, /* echo request */
67 0,
68 0,
69 ICLEN1, /* time exceeded */
70 ICLEN1, /* parameter problem */
71 ICLEN2, /* timestamp */
72 ICLEN2, /* timestamp reply */
73 0, /* information request */
74 0 /* information reply */
75 } ;
76
77 char icunrch[ICMP_UNRCH_NUM] =
78 {
79 PRC_UNREACH_NET, PRC_UNREACH_HOST, PRC_UNREACH_PROTOCOL,
80 PRC_UNREACH_PORT, PRC_MSGSIZE, PRC_UNREACH_HOST
81 } ;
82
83 struct icmp_stat icmpstat;
84
85
iptime()86 u_long iptime()
87 {
88 int s = spl7(); /* berkeley had spl6() */
89 u_long t;
90
91 t = (time.tv_sec % (24*60*60)) * 1000 + time.tv_usec / 1000;
92 splx(s);
93 return (htonl(t));
94 }
95
know_gateway2(gaddr,list)96 know_gateway2 (gaddr, list)
97 u_long gaddr;
98 struct mbuf *list;
99 {
100 register struct rtentry *rt;
101
102 while (list)
103 {
104 rt = mtod(list, struct rtentry *);
105 if ((rt->rt_flags & RTF_GATEWAY) &&
106 (rt->rt_dst.sa_family == AF_INET) &&
107 (((struct sockaddr_in *) &rt->rt_gateway)->sin_addr.s_addr == gaddr))
108 return (TRUE);
109 list = list->m_next;
110 }
111 return (FALSE);
112 }
113
know_gateway(gaddr)114 know_gateway (gaddr)
115 u_long gaddr;
116 {
117 register int i;
118
119 for (i=0 ; i<RTHASHSIZ ; i++)
120 {
121 if (know_gateway2 (gaddr, rthost[i]) ||
122 know_gateway2 (gaddr, rtnet[i]))
123 return (TRUE);
124 }
125 return (FALSE);
126 }
127
128 #ifdef BBNPING
129 /*
130 * Note that pinging is done on a per-route basis.
131 *
132 * 1. If a gateway is used by more than one route, then for routes
133 * with no active (measured by new data xfer) tcp connections,
134 * the gateway will be pinged.
135 * It is possible that every PINGTIME/2 seconds a gateway would
136 * be sent multiple icmp ECHO REQUESTS, but that is unlikely (uncommon)
137 * and we can worry about that if it actually proves to be a problem.
138 *
139 * 2. Since the ping count is incremented on a per-route basis, but
140 * ECHO REPLIES are dealt with on a per-address basis, a gateway is
141 * not prematurely pinged out if it is used by more than one active
142 * routing entry.
143 */
144
check_ping(list)145 static check_ping(list)
146 register struct mbuf *list;
147 {
148 register struct rtentry *rt;
149 register struct sockaddr_in *sin;
150 register struct mbuf *next;
151
152 while (list)
153 {
154 rt = mtod(list, struct rtentry *);
155 next = list->m_next; /* in case remove it from list */
156
157 if ((rt->rt_flags & RTF_GATEWAY) &&
158 (rt->rt_dst.sa_family == AF_INET))
159 {
160 sin = (struct sockaddr_in *) &rt->rt_gateway;
161 if ((rt->rt_refcnt > 0) && (rt->rt_flags & RTF_UP))
162 {
163 if (rt->irt_pings >= MAXPING)
164 {
165 /*
166 * Too many unanswered pings. re-route
167 * connections using this gateway. Usually,
168 * this happens because the gateway is flooded
169 * with traffic.
170 */
171 union { u_long ul; u_char c[4]; } a;
172
173 a.ul = sin->sin_addr.s_addr;
174 log(LOG_INFO, "gw %d.%d.%d.%d pinged out\n",
175 a.c[0], a.c[1], a.c[2], a.c[3]);
176
177 rt->irt_pings = 0;
178 ip_gdown(sin->sin_addr.s_addr);
179 }
180 else
181 {
182 /*
183 * Ping him again.
184 * See rcv_ack() for comparison with zero here.
185 */
186 rt->irt_pings ++;
187 if (rt->irt_pings > 0)
188 {
189 /*
190 * count ping even if doesn't get to
191 * interface (ENOBUFS) or other error
192 * (EHOSTDOWN if no gateway at that
193 * address on an IMP network).
194 */
195
196 ping (sin->sin_addr);
197 icmpstat.ic_pings ++;
198 }
199 else
200 icmpstat.ic_svpings ++;
201 }
202 }
203 else
204 {
205 if (rt->rt_flags & RTF_REINSTATE)
206 {
207 /*
208 * The gateway pinged out or died at some point.
209 * Let's see if it's back up or if our
210 * re-routing of current connections in ip_gdown
211 * has let it breathe again. Wait a while
212 * before try to use it again.
213 */
214 rt->irt_gdown --;
215 if (rt->irt_gdown <= 0)
216 {
217 rt->irt_gdown = 0;
218 /*
219 * Wait until we know it's alive
220 * for certain. Ping it.
221 */
222 ping (sin->sin_addr);
223 }
224 }
225 }
226 }
227
228 list = next;
229 }
230 }
231
reset_ping(list,addr)232 static reset_ping(list, addr)
233 register struct mbuf *list;
234 register u_long addr;
235 {
236 register struct rtentry *rt;
237
238 while (list)
239 {
240 rt = mtod(list, struct rtentry *);
241 if ((rt->rt_flags & RTF_GATEWAY) &&
242 (rt->rt_dst.sa_family == AF_INET))
243 {
244 if (((struct sockaddr_in *) &rt->rt_gateway)->sin_addr.s_addr == addr)
245 {
246 if (rt->rt_flags & RTF_REINSTATE)
247 {
248 if (rt->irt_gdown == 0)
249 {
250 /*
251 * Was not a slow echo reply. If was dead,
252 * use it again. If was flooded, new connections
253 * can now use it (old shifted away).
254 */
255 rt->rt_flags |= RTF_UP;
256 rt->rt_flags &= ~RTF_REINSTATE;
257 rt->rt_refcnt --; /* see ip_gdown() */
258 }
259 }
260 else
261 rt->irt_pings = 0;
262 }
263 }
264 list = list->m_next;
265 }
266 }
267
268 /*
269 * Would be nice if we could use HOSTHASH/NETHASH/0, but the hashing is done
270 * on the destination, not the intermediary gateway.
271 */
got_ping(addr)272 got_ping(addr)
273 register u_long addr;
274 {
275 register int i;
276
277 for (i=0 ; i<RTHASHSIZ ; i++)
278 {
279 reset_ping(rthost[i], addr);
280 reset_ping(rtnet[i], addr);
281 }
282 }
283 #endif
284
285 /*
286 * Process ICMP messages. Called directly from ip_input processor.
287 */
icmp(mp)288 icmp(mp)
289 register struct mbuf *mp;
290 {
291 register struct ip *ip;
292 register struct icmp *icp;
293 struct in_ifaddr *ia;
294 int ilen;
295 int prccode;
296
297 icmpstat.ic_total ++;
298
299 /*
300 * see ip_input()
301 */
302 if ((mp->m_off > MMAXOFF) ||
303 (mp->m_len < sizeof(struct ip) + ICMPSIZE))
304 {
305 if ((mp = m_pullup(mp, sizeof(struct ip) + ICMPSIZE)) == NULL)
306 {
307 icmpstat.ic_tooshort ++;
308 return;
309 }
310 }
311 ip = mtod(mp, struct ip *);
312 icp = (struct icmp *) (ip+1);
313
314 /*
315 * watch for fools sending out broadcast ICMP packets
316 * Don't check against inetifp, since is up to ip_input whether to receive
317 * on some interface rather than send to self for input on dst interface.
318 */
319 ia = in_iawithaddr(ip->ip_dst, FALSE);
320 if (ia == NULL)
321 {
322 /* drop it */
323 m_freem(mp);
324 return;
325 }
326
327 /* filter out message types */
328
329 if (icp->ic_type >= NICTYPE || icaction[icp->ic_type] == ICBAD)
330 {
331 icmpstat.ic_drops++;
332 goto badret;
333 }
334
335 if (mp->m_len < icpullup[icp->ic_type])
336 {
337 if ((mp = m_pullup(mp, icpullup[icp->ic_type])) == NULL)
338 {
339 icmpstat.ic_tooshort ++;
340 return;
341 }
342 ip = mtod(mp, struct ip *);
343 icp = (struct icmp *) (ip+1);
344 }
345 mp->m_off += sizeof(struct ip);
346 mp->m_len -= sizeof(struct ip);
347
348 ilen = ip->ip_len;
349
350 {
351 register u_short his_sum, our_sum;
352
353 his_sum = (u_short)icp->ic_sum;
354 icp->ic_sum = 0;
355 if (his_sum != (our_sum = (u_short)in_cksum(mp, ilen)))
356 {
357 icmpstat.ic_badsum++;
358 if (! nosum)
359 {
360 /* note that the icmp header doesn't overlap IP */
361 #ifdef HMPTRAPS
362 /* hmp_trap(T_ICMP_CKSUM, (caddr_t),0); */
363 #endif
364 inet_cksum_err ("icmp", ip, (u_long) his_sum, (u_long) our_sum);
365 netlog(mp);
366 return;
367 }
368 }
369 }
370
371 /*
372 * Now do any processing. Some messages are handled here,
373 * others are passed up ctlinput path for further processing.
374 */
375
376 switch (icp->ic_type)
377 {
378
379 case ICMP_UNRCH: /* destination unreachable */
380
381 if (icp->ic_code < ICMP_UNRCH_NUM)
382 {
383 register int (*ctlfunc)();
384
385 prccode = icunrch[icp->ic_code];
386 passup:
387 ctlfunc = ipsw[icp->ic_iphdr.ip_p].ipsw_user->pr_ctlinput;
388 (*ctlfunc) (prccode, (caddr_t) icp);
389 }
390 break;
391
392 case ICMP_SRCQ: /* source quench */
393
394 /*
395 * At the IP level, we could try to reroute the connection and see if we
396 * come up with a less loaded gateway. Problem with this is that we know
397 * total number of packets sent over a route, not the recent traffic load.
398 */
399 icmpstat.ic_quenches++;
400 prccode = PRC_QUENCH;
401 #ifdef HMPTRAPS
402 /* hmp_trap(T_ICMP_SRCQ, (caddr_t)0, 0); */
403 #endif
404 goto passup;
405
406 case ICMP_REDIR: /* redirect */
407
408 icmpstat.ic_redirects ++;
409
410 /*
411 * Sorry, we only trust the connected set of gateways
412 * that includes gateways installed by the system
413 * manager.
414 */
415 if (know_gateway(ip->ip_src.s_addr))
416 {
417 register struct mbuf **table;
418
419 if (icp->ic_code == ICMP_REDIR_NET)
420 {
421 prccode = PRC_REDIRECT_NET;
422 table = rtnet;
423 }
424 else
425 {
426 prccode = PRC_REDIRECT_HOST;
427 table = rthost;
428 }
429 if (icmp_redirect_route (icp, table))
430 goto passup;
431 }
432 else
433 {
434 /*
435 * Who are you? Why are you talking to us?
436 * And how do we know the ip source isn't a lie?
437 * (Eg., Catches Symbolics redirection of subnet broadcast.)
438 */
439 union { u_long ul; u_char c[4]; } a;
440
441 a.ul = ip->ip_src.s_addr;
442 log(LOG_INFO, "Ignoring redirect from %d.%d.%d.%d\n",
443 a.c[0], a.c[1], a.c[2], a.c[3]);
444 }
445 #ifdef HMPTRAPS
446 /* hmp_trap(T_ICMP_REDIR, (caddr_t)0,0); */
447 #endif
448 break;
449
450 case ICMP_ECHO: /* echo */
451
452 icp->ic_type = ICMP_ECHOR;
453 icmpstat.ic_echoes++;
454 goto loopback;
455
456 case ICMP_ECHOR: /* echo reply */
457
458 /* check for gateway ping packets, look for
459 * corresponding gateway entry and set echo count
460 * to zero.
461 */
462 #ifdef BBNPING
463 if (icp->ic_id == MY_ECHO_ID)
464 got_ping(ip->ip_src.s_addr);
465 #endif
466 break;
467
468 case ICMP_TIMEX: /* time exceeded */
469 /*
470 * IP time to live field should be associated with the route so
471 * that it can be dynamically adjusted for time exceeded in transit.
472 * If did, would only need to "pass time exceeded in reassembly"
473 * up to protocol (TCP) so that it can better try to avoid IP
474 * fragmentation.
475 */
476 icmpstat.ic_timex++;
477 prccode = (icp->ic_code == ICMP_TIMEX_XMT)
478 ? PRC_TIMXCEED_INTRANS
479 : PRC_TIMXCEED_REASS;
480 #ifdef HMPTRAPS
481 /* hmp_trap(T_ICMP_TIMEX, (caddr_t)0,0); */
482 #endif
483 goto passup;
484
485 case ICMP_TIMES: /* timestamp */
486
487 if (icp->ic_code == 0)
488 {
489 icp->ic_type = ICMP_TIMESR;
490 /*
491 * Can now do timestamps in UT
492 *
493 icp->ic_trcv = (long)time.tv_sec | 0x80;
494 icp->ic_txmt = (long)time.tv_sec | 0x80;
495 */
496 icp->ic_txmt = icp->ic_trcv = iptime();
497 goto loopback;
498 }
499 break;
500
501 case ICMP_INFO: /* info request */
502 /*
503 * He knows his host number, but not his network #,
504 * fill in src & dst as he would have, had he known.
505 */
506 {
507 register struct in_ifaddr *inaddress;
508 extern struct ifnet *inetifp;
509
510 icp->ic_type = ICMP_INFOR;
511 inaddress = in_iafromif(inetifp);
512 ip->ip_src.s_addr |= inaddress->ia_subnet;
513 ip->ip_dst = redir_addr(ip);
514 }
515 goto loopback;
516
517 case ICMP_PARM: /* parameter problem */
518 icmpstat.ic_parm++;
519 prccode = PRC_PARAMPROB;
520 #ifdef HMPTRAPS
521 /* hmp_trap(T_ICMP_PARM, (caddr_t)0,0); */
522 #endif
523 goto passup;
524 }
525
526 badret :
527 m_freem(mp);
528 return;
529
530 loopback :
531 {
532 struct in_addr temp;
533 register int error;
534
535 temp = ip->ip_src;
536 ip->ip_src = ip->ip_dst;
537 ip->ip_dst = temp;
538 /* ip->ip_p = IPPROTO_ICMP; still is from input */
539 /* ip->ip_tos = 0; use same tos for reply */
540
541 icp->ic_sum = in_cksum(mp, ilen);
542 mp->m_off -= sizeof(struct ip);
543 mp->m_len += sizeof(struct ip);
544 NOPCB_IPSEND (mp, (int)ip->ip_len, FALSE, error);
545
546 #ifdef lint
547 error = error;
548 #endif
549
550 }
551 }
552
553
554 /*
555 * Ping gateways in use to see if they are still alive.
556 */
ic_timeo()557 ic_timeo()
558 {
559 #ifdef BBNPING
560 register int i;
561 register int level;
562 static int ictimer;
563
564 if (--ictimer > 0)
565 return;
566 ictimer = PINGTIME;
567
568 level = splnet();
569 for (i=0 ; i<RTHASHSIZ ; i++)
570 {
571 check_ping(rthost[i]);
572 check_ping(rtnet[i]);
573 }
574 splx(level);
575 #endif
576 }
577
rtfind(dst,via,table)578 static struct rtentry *rtfind (dst, via, table)
579 struct in_addr dst;
580 struct in_addr via;
581 struct mbuf *table[];
582 {
583 register struct mbuf *m;
584
585 struct rtentry *rt;
586
587 if (table == rthost)
588 m = rthost[HOSTHASH(dst.s_addr) % RTHASHSIZ];
589 else
590 {
591 if (dst.s_addr)
592 {
593 m = rtnet[NETHASH(dst) % RTHASHSIZ];
594 dst.s_addr = iptonet(dst);
595 }
596 else
597 m = rtnet[0];
598 }
599
600 while (m)
601 {
602 struct in_addr d, g;
603
604 rt = mtod(m, struct rtentry *);
605 d = satoipa(&rt->rt_dst);
606 g = satoipa(&rt->rt_gateway);
607 if ((d.s_addr == dst.s_addr) &&
608 (g.s_addr == via.s_addr) &&
609 (rt->rt_dst.sa_family == AF_INET))
610 {
611 /* then, hash values must be same. */
612 return (rt);
613 }
614
615 m = m->m_next;
616 }
617
618 return (NULL);
619 }
620
621
622 icmp_redirect_route (ic, table)
623 struct icmp *ic;
624 struct mbuf *table[];
625 {
626 struct ip *ip;
627 int flags;
628 static struct sockaddr_in red_dst = { AF_INET } ;
629 static struct sockaddr_in red_gtw = { AF_INET } ;
630
631 ip = (struct ip *) ic->ic_data;
632 /*
633 * 1. Make new routing entry so that new connections will use better
634 * route. But only make entry if have not already done so.
635 */
636 if (!rtfind(ip->ip_dst, ic->ic_gaddr, table))
637 {
638 char *err;
639
640 /* check reasonableness of redirect */
641
642 if (in_iawithnet(ic->ic_gaddr) == NULL)
643 {
644 /*
645 * Sorry, can't get there from here.
646 */
647 union { u_long ul; u_char c[4]; } g, f, t, v;
648
649 err = "No interface for first hop";
650 perr :
651
652 g.ul = (((struct ip *) (((char *) ic) - sizeof(struct ip)))->ip_src.s_addr);
653 f.ul = ip->ip_src.s_addr;
654 t.ul = ip->ip_dst.s_addr;
655 v.ul = ic->ic_gaddr.s_addr;
656 log(LOG_INFO,
657 "Ignoring ICMP redirect from gw %d.%d.%d.%d? to go from %d.%d.%d.%d to %d.%d.%d.%d via %d.%d.%d.%d : %s\n",
658 g.c[0], g.c[1], g.c[2], g.c[3],
659 f.c[0], f.c[1], f.c[2], f.c[3],
660 t.c[0], t.c[1], t.c[2], t.c[3],
661 v.c[0], v.c[1], v.c[2], v.c[3],
662 err);
663
664 return (FALSE);
665 }
666
667 if (in_iawithaddr(ic->ic_gaddr, TRUE))
668 {
669 /*
670 * redirect to self is stupid, as is redirect to
671 * broadcast address (which if_iawithaddr will match
672 * for interfaces with IFF_BROADCAST set.)
673 */
674 err = "redirected to self";
675 goto perr;
676 }
677
678 if (iptonet(ic->ic_gaddr) != iptonet(ip->ip_src))
679 {
680 /*
681 * Why is this gateway redirecting us? It is not
682 * giving us a first hop gateway that is on the
683 * local net that we advertise.
684 */
685 err = "new first hop net <> src net";
686 goto perr;
687 }
688
689 #ifdef done_in_icmp_c
690 if (! know_gateway(icmp source))
691 /*
692 * Sorry, we only trust the connected set of gateways
693 * that includes gateways installed by the system
694 * manager. Who are you? Why are you talking to us?
695 */
696 return;
697 #endif
698
699 /* o.k., I'll believe it */
700 flags = RTF_UP;
701 if (table == rthost)
702 {
703 flags |= RTF_HOST;
704 red_dst.sin_addr.s_addr = ip->ip_dst.s_addr;
705 }
706 else
707 {
708 flags |= RTF_GATEWAY;
709 red_dst.sin_addr.s_addr = iptonet(ip->ip_dst);
710 }
711 red_gtw.sin_addr.s_addr = ic->ic_gaddr.s_addr;
712 rtinit ((struct sockaddr *) &red_dst,
713 (struct sockaddr *) &red_gtw,
714 flags);
715 }
716 return (TRUE);
717 }
718
719 icmp_redirect_inp(inp, ic, table)
720 struct inpcb *inp;
721 struct icmp *ic;
722 struct mbuf **table;
723 {
724 struct rtentry *rt;
725
726 /*
727 * 2. Redirect current connection.
728 */
729
730 #ifdef neverdef
731 /*
732 * This would try to balance load across gateways, but
733 * that's something best done by the gateway before it
734 * sends a redirect. Also, consider 3 gateways of which
735 * two are bad, and possibility of bouncing between the
736 * two bad ones until their use counts got high enough.
737 *
738 * Currently, gateways only take into account # hops, not
739 * load.
740 */
741 if (rt = inp->inp_route.ro_rt)
742 {
743 short oflags;
744
745 /* try to force a different path */
746 oflags = rt->rt_flags;
747 rt->rt_flags &= ~RTF_UP;
748 /* but don't lose current route */
749 rt->rt_refcnt ++;
750 (void) ip_reroute (inp);
751 rt->rt_refcnt --;
752 rt->rt_flags = oflags;
753 }
754 #endif
755 if (rt = rtfind (ic->ic_iphdr.ip_dst, ic->ic_gaddr, table))
756 {
757 if (rt->rt_flags & RTF_UP)
758 {
759 /*
760 * packets go out an interface with our local
761 * IP address. Know true from checks after
762 * first call to rtfind above.
763 *
764 * Interface has to be at least as up as
765 * for previous route, so don't bother to
766 * check.
767 */
768 if (inp->inp_route.ro_rt)
769 rtfree (inp->inp_route.ro_rt);
770 inp->inp_route.ro_rt = rt;
771 rt->rt_refcnt ++;
772 }
773 else
774 log(LOG_INFO, "ICMP Redirect to down route\n");
775 }
776 else
777 log(LOG_INFO, "ICMP Redirect route not installed?\n");
778 }
779